140 using std::make_pair;
146 const
Int_t TMVA::MethodBDT::fgDebugLevel = 0;
151 TMVA::MethodBDT::MethodBDT( const
TString& jobName,
153 DataSetInfo& theData,
156 TMVA::MethodBase( jobName, Types::kBDT, methodTitle, theData, theOption, theTargetDir )
159 , fSigToBkgFraction(0)
161 , fTransitionPoint(0)
164 , fBaggedGradBoost(kFALSE)
168 , fMinNodeSizeS("5%")
171 , fMinLinCorrForFisher(.8)
172 , fUseExclusiveVars(0)
173 , fUseYesNoLeaf(kFALSE)
174 , fNodePurityLimit(0)
177 , fPruneMethod(DecisionTree::kNoPruning)
179 , fFValidationEvents(0)
181 , fRandomisedTrees(kFALSE)
183 , fUsePoissonNvars(0)
184 , fUseNTrainEvents(0)
185 , fBaggedSampleFraction(0)
186 , fNoNegWeightsInTraining(kFALSE)
187 , fInverseBoostNegWeights(kFALSE)
188 , fPairNegWeightsGlobal(kFALSE)
189 , fTrainWithNegWeights(kFALSE)
190 , fDoBoostMonitor(kFALSE)
198 , fDoPreselection(kFALSE)
199 , fHistoricBool(kFALSE)
201 fMonitorNtuple =
NULL;
213 , fSigToBkgFraction(0)
215 , fTransitionPoint(0)
218 , fBaggedGradBoost(
kFALSE)
222 , fMinNodeSizeS(
"5%")
225 , fMinLinCorrForFisher(.8)
226 , fUseExclusiveVars(0)
228 , fNodePurityLimit(0)
233 , fFValidationEvents(0)
235 , fRandomisedTrees(
kFALSE)
237 , fUsePoissonNvars(0)
238 , fUseNTrainEvents(0)
239 , fBaggedSampleFraction(0)
240 , fNoNegWeightsInTraining(
kFALSE)
241 , fInverseBoostNegWeights(
kFALSE)
242 , fPairNegWeightsGlobal(
kFALSE)
243 , fTrainWithNegWeights(
kFALSE)
313 DeclareOptionRef(fNTrees,
"NTrees",
"Number of trees in the forest");
314 if (DoRegression()) {
315 DeclareOptionRef(fMaxDepth=50,
"MaxDepth",
"Max depth of the decision tree allowed");
317 DeclareOptionRef(fMaxDepth=3,
"MaxDepth",
"Max depth of the decision tree allowed");
320 TString tmp=
"5%";
if (DoRegression()) tmp=
"0.2%";
321 DeclareOptionRef(fMinNodeSizeS=tmp,
"MinNodeSize",
"Minimum percentage of training events required in a leaf node (default: Classification: 5%, Regression: 0.2%)");
323 DeclareOptionRef(fNCuts,
"nCuts",
"Number of grid points in variable range used in finding optimal cut in node splitting");
325 DeclareOptionRef(fBoostType,
"BoostType",
"Boosting type for the trees in the forest (note: AdaCost is still experimental)");
327 AddPreDefVal(
TString(
"AdaBoost"));
328 AddPreDefVal(
TString(
"RealAdaBoost"));
329 AddPreDefVal(
TString(
"AdaCost"));
330 AddPreDefVal(
TString(
"Bagging"));
332 AddPreDefVal(
TString(
"AdaBoostR2"));
334 if (DoRegression()) {
335 fBoostType =
"AdaBoostR2";
337 fBoostType =
"AdaBoost";
339 DeclareOptionRef(fAdaBoostR2Loss=
"Quadratic",
"AdaBoostR2Loss",
"Type of Loss function in AdaBoostR2");
340 AddPreDefVal(
TString(
"Linear"));
341 AddPreDefVal(
TString(
"Quadratic"));
342 AddPreDefVal(
TString(
"Exponential"));
344 DeclareOptionRef(fBaggedBoost=
kFALSE,
"UseBaggedBoost",
"Use only a random subsample of all events for growing the trees in each boost iteration.");
345 DeclareOptionRef(fShrinkage=1.0,
"Shrinkage",
"Learning rate for GradBoost algorithm");
346 DeclareOptionRef(fAdaBoostBeta=.5,
"AdaBoostBeta",
"Learning rate for AdaBoost algorithm");
347 DeclareOptionRef(fRandomisedTrees,
"UseRandomisedTrees",
"Determine at each node splitting the cut variable only as the best out of a random subset of variables (like in RandomForests)");
348 DeclareOptionRef(fUseNvars,
"UseNvars",
"Size of the subset of variables used with RandomisedTree option");
349 DeclareOptionRef(fUsePoissonNvars,
"UsePoissonNvars",
"Interpret \"UseNvars\" not as fixed number but as mean of a Possion distribution in each split with RandomisedTree option");
350 DeclareOptionRef(fBaggedSampleFraction=.6,
"BaggedSampleFraction",
"Relative size of bagged event sample to original size of the data sample (used whenever bagging is used (i.e. UseBaggedBoost, Bagging,)" );
352 DeclareOptionRef(fUseYesNoLeaf=
kTRUE,
"UseYesNoLeaf",
353 "Use Sig or Bkg categories, or the purity=S/(S+B) as classification of the leaf node -> Real-AdaBoost");
354 if (DoRegression()) {
358 DeclareOptionRef(fNegWeightTreatment=
"InverseBoostNegWeights",
"NegWeightTreatment",
"How to treat events with negative weights in the BDT training (particular the boosting) : IgnoreInTraining; Boost With inverse boostweight; Pair events with negative and positive weights in traning sample and *annihilate* them (experimental!)");
359 AddPreDefVal(
TString(
"InverseBoostNegWeights"));
360 AddPreDefVal(
TString(
"IgnoreNegWeightsInTraining"));
361 AddPreDefVal(
TString(
"NoNegWeightsInTraining"));
362 AddPreDefVal(
TString(
"PairNegWeightsGlobal"));
367 DeclareOptionRef(fCss=1.,
"Css",
"AdaCost: cost of true signal selected signal");
368 DeclareOptionRef(fCts_sb=1.,
"Cts_sb",
"AdaCost: cost of true signal selected bkg");
369 DeclareOptionRef(fCtb_ss=1.,
"Ctb_ss",
"AdaCost: cost of true bkg selected signal");
370 DeclareOptionRef(fCbb=1.,
"Cbb",
"AdaCost: cost of true bkg selected bkg ");
372 DeclareOptionRef(fNodePurityLimit=0.5,
"NodePurityLimit",
"In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
375 DeclareOptionRef(fSepTypeS,
"SeparationType",
"Separation criterion for node splitting");
376 AddPreDefVal(
TString(
"CrossEntropy"));
377 AddPreDefVal(
TString(
"GiniIndex"));
378 AddPreDefVal(
TString(
"GiniIndexWithLaplace"));
379 AddPreDefVal(
TString(
"MisClassificationError"));
380 AddPreDefVal(
TString(
"SDivSqrtSPlusB"));
381 AddPreDefVal(
TString(
"RegressionVariance"));
382 if (DoRegression()) {
383 fSepTypeS =
"RegressionVariance";
385 fSepTypeS =
"GiniIndex";
388 DeclareOptionRef(fDoBoostMonitor=
kFALSE,
"DoBoostMonitor",
"Create control plot with ROC integral vs tree number");
390 DeclareOptionRef(fUseFisherCuts=
kFALSE,
"UseFisherCuts",
"Use multivariate splits using the Fisher criterion");
391 DeclareOptionRef(fMinLinCorrForFisher=.8,
"MinLinCorrForFisher",
"The minimum linear correlation between two variables demanded for use in Fisher criterion in node splitting");
392 DeclareOptionRef(fUseExclusiveVars=
kFALSE,
"UseExclusiveVars",
"Variables already used in fisher criterion are not anymore analysed individually for node splitting");
395 DeclareOptionRef(fDoPreselection=
kFALSE,
"DoPreselection",
"and and apply automatic pre-selection for 100% efficient signal (bkg) cuts prior to training");
398 DeclareOptionRef(fSigToBkgFraction=1,
"SigToBkgFraction",
"Sig to Bkg ratio used in Training (similar to NodePurityLimit, which cannot be used in real adaboost");
400 DeclareOptionRef(fPruneMethodS,
"PruneMethod",
"Note: for BDTs use small trees (e.g.MaxDepth=3) and NoPruning: Pruning: Method used for pruning (removal) of statistically insignificant branches ");
401 AddPreDefVal(
TString(
"NoPruning"));
402 AddPreDefVal(
TString(
"ExpectedError"));
403 AddPreDefVal(
TString(
"CostComplexity"));
405 DeclareOptionRef(fPruneStrength,
"PruneStrength",
"Pruning strength");
407 DeclareOptionRef(fFValidationEvents=0.5,
"PruningValFraction",
"Fraction of events to use for optimizing automatic pruning.");
410 DeclareOptionRef(fMinNodeEvents=0,
"nEventsMin",
"deprecated: Use MinNodeSize (in % of training events) instead");
412 DeclareOptionRef(fBaggedGradBoost=
kFALSE,
"UseBaggedGrad",
"deprecated: Use *UseBaggedBoost* instead: Use only a random subsample of all events for growing the trees in each iteration.");
413 DeclareOptionRef(fBaggedSampleFraction,
"GradBaggingFraction",
"deprecated: Use *BaggedSampleFraction* instead: Defines the fraction of events to be used in each iteration, e.g. when UseBaggedGrad=kTRUE. ");
414 DeclareOptionRef(fUseNTrainEvents,
"UseNTrainEvents",
"deprecated: Use *BaggedSampleFraction* instead: Number of randomly picked training events used in randomised (and bagged) trees");
415 DeclareOptionRef(fNNodesMax,
"NNodesMax",
"deprecated: Use MaxDepth instead to limit the tree size" );
427 DeclareOptionRef(fHistoricBool=
kTRUE,
"UseWeightedTrees",
428 "Use weighted trees or simple average in classification from the forest");
429 DeclareOptionRef(fHistoricBool=
kFALSE,
"PruneBeforeBoost",
"Flag to prune the tree before applying boosting algorithm");
430 DeclareOptionRef(fHistoricBool=
kFALSE,
"RenormByClass",
"Individually re-normalize each event class to the original size after boosting");
432 AddPreDefVal(
TString(
"NegWeightTreatment"),
TString(
"IgnoreNegWeights"));
446 else if (fSepTypeS ==
"giniindex") fSepType =
new GiniIndex();
448 else if (fSepTypeS ==
"crossentropy") fSepType =
new CrossEntropy();
449 else if (fSepTypeS ==
"sdivsqrtsplusb") fSepType =
new SdivSqrtSplusB();
450 else if (fSepTypeS ==
"regressionvariance") fSepType =
NULL;
453 Log() <<
kFATAL <<
"<ProcessOptions> unknown Separation Index option " << fSepTypeS <<
" called" <<
Endl;
456 fPruneMethodS.ToLower();
462 Log() <<
kFATAL <<
"<ProcessOptions> unknown PruneMethod " << fPruneMethodS <<
" option called" <<
Endl;
468 <<
"Sorry autmoatic pruning strength determination is not implemented yet for ExpectedErrorPruning" <<
Endl;
472 if (fMinNodeEvents > 0){
473 fMinNodeSize =
Double_t(fMinNodeEvents*100.) /
Data()->GetNTrainingEvents();
474 Log() <<
kWARNING <<
"You have explicitly set ** nEventsMin = " << fMinNodeEvents<<
" ** the min ablsolut number \n"
475 <<
"of events in a leaf node. This is DEPRECATED, please use the option \n"
476 <<
"*MinNodeSize* giving the relative number as percentage of training \n"
477 <<
"events instead. \n"
478 <<
"nEventsMin="<<fMinNodeEvents<<
"--> MinNodeSize="<<fMinNodeSize<<
"%"
480 Log() <<
kWARNING <<
"Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recomeded \n"
481 <<
" *MinNodeSize* = " << fMinNodeSizeS <<
" option !!" <<
Endl ;
482 fMinNodeSizeS =
Form(
"%F3.2",fMinNodeSize);
485 SetMinNodeSize(fMinNodeSizeS);
489 fAdaBoostR2Loss.ToLower();
491 if (fBoostType==
"Grad") {
493 if (fNegWeightTreatment==
"InverseBoostNegWeights"){
494 Log() <<
kWARNING <<
"the option *InverseBoostNegWeights* does not exist for BoostType=Grad --> change to *IgnoreNegWeightsInTraining*" <<
Endl;
495 fNegWeightTreatment=
"IgnoreNegWeightsInTraining";
496 fNoNegWeightsInTraining=
kTRUE;
498 }
else if (fBoostType==
"RealAdaBoost"){
499 fBoostType =
"AdaBoost";
501 }
else if (fBoostType==
"AdaCost"){
505 if (fFValidationEvents < 0.0) fFValidationEvents = 0.0;
506 if (fAutomatic && fFValidationEvents > 0.5) {
507 Log() <<
kWARNING <<
"You have chosen to use more than half of your training sample "
508 <<
"to optimize the automatic pruning algorithm. This is probably wasteful "
509 <<
"and your overall results will be degraded. Are you sure you want this?"
514 if (this->
Data()->HasNegativeEventWeights()){
515 Log() <<
kINFO <<
" You are using a Monte Carlo that has also negative weights. "
516 <<
"That should in principle be fine as long as on average you end up with "
517 <<
"something positive. For this you have to make sure that the minimal number "
518 <<
"of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize="
519 << fMinNodeSizeS <<
" ("<< fMinNodeSize <<
"%)"
520 <<
", (or the deprecated equivalent nEventsMin) you can set this via the "
521 <<
"BDT option string when booking the "
522 <<
"classifier) is large enough to allow for reasonable averaging!!! "
523 <<
" If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining "
524 <<
"which ignores events with negative weight in the training. " <<
Endl
525 <<
Endl <<
"Note: You'll get a WARNING message during the training if that should ever happen" <<
Endl;
528 if (DoRegression()) {
529 if (fUseYesNoLeaf && !IsConstructedFromWeightFile()){
530 Log() <<
kWARNING <<
"Regression Trees do not work with fUseYesNoLeaf=TRUE --> I will set it to FALSE" <<
Endl;
534 if (fSepType !=
NULL){
535 Log() <<
kWARNING <<
"Regression Trees do not work with Separation type other than <RegressionVariance> --> I will use it instead" <<
Endl;
539 Log() <<
kWARNING <<
"Sorry, UseFisherCuts is not available for regression analysis, I will ignore it!" <<
Endl;
543 Log() <<
kWARNING <<
"Sorry, the option of nCuts<0 using a more elaborate node splitting algorithm " <<
Endl;
544 Log() <<
kWARNING <<
"is not implemented for regression analysis ! " <<
Endl;
545 Log() <<
kWARNING <<
"--> I switch do default nCuts = 20 and use standard node splitting"<<
Endl;
549 if (fRandomisedTrees){
550 Log() <<
kINFO <<
" Randomised trees use no pruning" <<
Endl;
555 if (fUseFisherCuts) {
556 Log() <<
kWARNING <<
"Sorry, when using the option UseFisherCuts, the other option nCuts<0 (i.e. using" <<
Endl;
557 Log() <<
kWARNING <<
" a more elaborate node splitting algorithm) is not implemented. I will switch o " <<
Endl;
558 Log() <<
kWARNING <<
"--> I switch do default nCuts = 20 and use standard node splitting WITH possible Fisher criteria"<<
Endl;
563 Log() <<
kERROR <<
" Zero Decision Trees demanded... that does not work !! "
564 <<
" I set it to 1 .. just so that the program does not crash"
569 fNegWeightTreatment.ToLower();
570 if (fNegWeightTreatment ==
"ignorenegweightsintraining") fNoNegWeightsInTraining =
kTRUE;
571 else if (fNegWeightTreatment ==
"nonegweightsintraining") fNoNegWeightsInTraining =
kTRUE;
572 else if (fNegWeightTreatment ==
"inverseboostnegweights") fInverseBoostNegWeights =
kTRUE;
573 else if (fNegWeightTreatment ==
"pairnegweightsglobal") fPairNegWeightsGlobal =
kTRUE;
574 else if (fNegWeightTreatment ==
"pray")
Log() <<
kWARNING <<
"Yes, good luck with praying " <<
Endl;
577 Log() <<
kFATAL <<
"<ProcessOptions> unknown option for treating negative event weights during training " << fNegWeightTreatment <<
" requested" <<
Endl;
580 if (fNegWeightTreatment ==
"pairnegweightsglobal")
581 Log() <<
kWARNING <<
" you specified the option NegWeightTreatment=PairNegWeightsGlobal : This option is still considered EXPERIMENTAL !! " <<
Endl;
588 while (tmp < fNNodesMax){
592 Log() <<
kWARNING <<
"You have specified a deprecated option *NNodesMax="<<fNNodesMax
593 <<
"* \n this has been translated to MaxDepth="<<fMaxDepth<<
Endl;
597 if (fUseNTrainEvents>0){
598 fBaggedSampleFraction = (
Double_t) fUseNTrainEvents/
Data()->GetNTrainingEvents();
599 Log() <<
kWARNING <<
"You have specified a deprecated option *UseNTrainEvents="<<fUseNTrainEvents
600 <<
"* \n this has been translated to BaggedSampleFraction="<<fBaggedSampleFraction<<
"(%)"<<
Endl;
603 if (fBoostType==
"Bagging") fBaggedBoost =
kTRUE;
604 if (fBaggedGradBoost){
605 fBaggedBoost =
kTRUE;
606 Log() <<
kWARNING <<
"You have specified a deprecated option *UseBaggedGrad* --> please use *UseBaggedBoost* instead" <<
Endl;
615 if (sizeInPercent > 0 && sizeInPercent < 50){
616 fMinNodeSize=sizeInPercent;
619 Log() <<
kFATAL <<
"you have demanded a minimal node size of "
620 << sizeInPercent <<
"% of the training events.. \n"
621 <<
" that somehow does not make sense "<<
Endl;
630 if (sizeInPercent.
IsFloat()) SetMinNodeSize(sizeInPercent.
Atof());
632 Log() <<
kFATAL <<
"I had problems reading the option MinNodeEvents, which "
633 <<
"after removing a possible % sign now reads " << sizeInPercent <<
Endl;
647 fBoostType =
"AdaBoost";
648 if(DataInfo().GetNClasses()!=0)
652 fBoostType =
"AdaBoostR2";
653 fAdaBoostR2Loss =
"Quadratic";
654 if(DataInfo().GetNClasses()!=0)
660 fPruneMethodS =
"NoPruning";
664 fFValidationEvents = 0.5;
665 fRandomisedTrees =
kFALSE;
668 fUsePoissonNvars =
kTRUE;
673 SetSignalReferenceCut( 0 );
686 for (
UInt_t i=0; i<fForest.size(); i++)
delete fForest[i];
689 fBoostWeights.clear();
690 if (fMonitorNtuple) { fMonitorNtuple->Delete(); fMonitorNtuple=
NULL; }
691 fVariableImportance.clear();
697 Log() <<
kDEBUG <<
" successfully(?) reset the method " <<
Endl;
710 for (
UInt_t i=0; i<fForest.size(); i++)
delete fForest[i];
718 if (!HasTrainingTree())
Log() <<
kFATAL <<
"<Init> Data().TrainingTree() is zero pointer" <<
Endl;
720 if (fEventSample.size() > 0) {
722 for (
UInt_t iev=0; iev<fEventSample.size(); iev++) fEventSample[iev]->SetBoostWeight(1.);
725 UInt_t nevents =
Data()->GetNTrainingEvents();
727 std::vector<const TMVA::Event*> tmpEventSample;
728 for (
Long64_t ievt=0; ievt<nevents; ievt++) {
730 Event*
event =
new Event( *GetTrainingEvent(ievt) );
731 tmpEventSample.push_back(event);
734 if (!DoRegression()) DeterminePreselectionCuts(tmpEventSample);
735 else fDoPreselection =
kFALSE;
737 for (
UInt_t i=0; i<tmpEventSample.size(); i++)
delete tmpEventSample[i];
742 for (
Long64_t ievt=0; ievt<nevents; ievt++) {
745 Event*
event =
new Event( *GetTrainingEvent(ievt) );
746 if (fDoPreselection){
747 if (
TMath::Abs(ApplyPreselectionCuts(event)) > 0.05) {
753 if (event->GetWeight() < 0 && (IgnoreEventsWithNegWeightsInTraining() || fNoNegWeightsInTraining)){
754 if (firstNegWeight) {
755 Log() <<
kWARNING <<
" Note, you have events with negative event weight in the sample, but you've chosen to ignore them" <<
Endl;
759 }
else if (event->GetWeight()==0){
760 if (firstZeroWeight) {
762 Log() <<
"Events with weight == 0 are going to be simply ignored " <<
Endl;
766 if (event->GetWeight() < 0) {
767 fTrainWithNegWeights=
kTRUE;
770 if (fPairNegWeightsGlobal){
771 Log() <<
kWARNING <<
"Events with negative event weights are found and "
772 <<
" will be removed prior to the actual BDT training by global "
773 <<
" paring (and subsequent annihilation) with positiv weight events"
776 Log() <<
kWARNING <<
"Events with negative event weights are USED during "
777 <<
"the BDT training. This might cause problems with small node sizes "
778 <<
"or with the boosting. Please remove negative events from training "
779 <<
"using the option *IgnoreEventsWithNegWeightsInTraining* in case you "
780 <<
"observe problems with the boosting"
787 Double_t modulo = 1.0/(fFValidationEvents);
788 Int_t imodulo =
static_cast<Int_t>( fmod(modulo,1.0) > 0.5 ?
ceil(modulo) :
floor(modulo) );
789 if (ievt % imodulo == 0) fValidationSample.push_back( event );
790 else fEventSample.push_back( event );
793 fEventSample.push_back(event);
799 Log() <<
kINFO <<
"<InitEventSample> Internally I use " << fEventSample.size()
800 <<
" for Training and " << fValidationSample.size()
801 <<
" for Pruning Validation (" << ((
Float_t)fValidationSample.size())/((
Float_t)fEventSample.size()+fValidationSample.size())*100.0
802 <<
"% of training used for validation)" << Endl;
806 if (fPairNegWeightsGlobal) PreProcessNegativeEventWeights();
809 if (!DoRegression()){
810 Log() <<
kINFO <<
"<InitEventSample> For classification trees, "<<
Endl;
811 Log() <<
kINFO <<
" the effective number of backgrounds is scaled to match "<<
Endl;
812 Log() <<
kINFO <<
" the signal. Othersise the first boosting step would do 'just that'!"<<
Endl;
826 Double_t nevents = fEventSample.size();
828 Int_t sumSig=0, sumBkg=0;
829 for (
UInt_t ievt=0; ievt<fEventSample.size(); ievt++) {
830 if ((DataInfo().IsSignal(fEventSample[ievt])) ) {
831 sumSigW += fEventSample[ievt]->GetWeight();
834 sumBkgW += fEventSample[ievt]->GetWeight();
838 if (sumSigW && sumBkgW){
839 Double_t normSig = nevents/((1+fSigToBkgFraction)*sumSigW)*fSigToBkgFraction;
840 Double_t normBkg = nevents/((1+fSigToBkgFraction)*sumBkgW); ;
841 Log() <<
kINFO <<
"re-normlise events such that Sig and Bkg have respective sum of weights = "
842 << fSigToBkgFraction <<
Endl;
843 Log() <<
kINFO <<
" sig->sig*"<<normSig <<
"ev. bkg->bkg*"<<normBkg <<
"ev." <<
Endl;
844 Log() <<
kINFO <<
"#events: (reweighted) sig: "<< sumSigW*normSig <<
" bkg: " << sumBkgW*normBkg <<
Endl;
845 Log() <<
kINFO <<
"#events: (unweighted) sig: "<< sumSig <<
" bkg: " << sumBkg <<
Endl;
846 for (
Long64_t ievt=0; ievt<nevents; ievt++) {
847 if ((DataInfo().IsSignal(fEventSample[ievt])) ) fEventSample[ievt]->SetBoostWeight(normSig);
848 else fEventSample[ievt]->SetBoostWeight(normBkg);
851 Log() <<
kINFO <<
"--> could not determine scaleing factors as either there are " <<
Endl;
852 Log() <<
kINFO <<
" no signal events (sumSigW="<<sumSigW<<
") or no bkg ev. (sumBkgW="<<sumBkgW<<
")"<<
Endl;
857 fTrainSample = &fEventSample;
859 GetBaggedSubSample(fEventSample);
860 fTrainSample = &fSubSample;
886 std::vector<const Event*> negEvents;
887 for (
UInt_t iev = 0; iev < fEventSample.size(); iev++){
888 if (fEventSample[iev]->
GetWeight() < 0) {
889 totalNegWeights += fEventSample[iev]->GetWeight();
890 negEvents.push_back(fEventSample[iev]);
892 totalPosWeights += fEventSample[iev]->GetWeight();
894 totalWeights += fEventSample[iev]->GetWeight();
896 if (totalNegWeights == 0 ) {
897 Log() <<
kINFO <<
"no negative event weights found .. no preprocessing necessary" <<
Endl;
900 Log() <<
kINFO <<
"found a total of " << totalNegWeights <<
" of negative event weights which I am going to try to pair with positive events to annihilate them" <<
Endl;
901 Log() <<
kINFO <<
"found a total of " << totalPosWeights <<
" of events with positive weights" <<
Endl;
902 Log() <<
kINFO <<
"--> total sum of weights = " << totalWeights <<
" = " << totalNegWeights+totalPosWeights <<
Endl;
909 for (
Int_t i=0; i<2; i++){
910 invCov = ((*cov)[i]);
912 std::cout <<
"<MethodBDT::PreProcessNeg...> matrix is almost singular with deterninant="
914 <<
" did you use the variables that are linear combinations or highly correlated?"
918 std::cout <<
"<MethodBDT::PreProcessNeg...> matrix is singular with determinant="
920 <<
" did you use the variables that are linear combinations?"
929 Log() <<
kINFO <<
"Found a total of " << totalNegWeights <<
" in negative weights out of " << fEventSample.size() <<
" training events " <<
Endl;
930 Timer timer(negEvents.size(),
"Negative Event paired");
931 for (
UInt_t nev = 0; nev < negEvents.size(); nev++){
932 timer.DrawProgressBar( nev );
933 Double_t weight = negEvents[nev]->GetWeight();
934 UInt_t iClassID = negEvents[nev]->GetClass();
935 invCov = ((*cov)[iClassID]);
941 for (
UInt_t iev = 0; iev < fEventSample.size(); iev++){
942 if (iClassID==fEventSample[iev]->
GetClass() && fEventSample[iev]->GetWeight() > 0){
944 for (
UInt_t ivar=0; ivar < GetNvar(); ivar++){
945 for (
UInt_t jvar=0; jvar<GetNvar(); jvar++){
946 dist += (negEvents[nev]->GetValue(ivar)-fEventSample[iev]->GetValue(ivar))*
947 (*invCov)[ivar][jvar]*
948 (negEvents[nev]->GetValue(jvar)-fEventSample[iev]->GetValue(jvar));
951 if (dist < minDist) { iMin=iev; minDist=
dist;}
957 Double_t newWeight = (negEvents[nev]->GetWeight() + fEventSample[iMin]->GetWeight());
959 negEvents[nev]->SetBoostWeight( 0 );
960 fEventSample[iMin]->SetBoostWeight( newWeight/fEventSample[iMin]->GetOriginalWeight() );
962 negEvents[nev]->SetBoostWeight( newWeight/negEvents[nev]->GetOriginalWeight() );
963 fEventSample[iMin]->SetBoostWeight( 0 );
966 }
else Log() <<
kFATAL <<
"preprocessing didn't find event to pair with the negative weight ... probably a bug" <<
Endl;
967 weight = negEvents[nev]->GetWeight();
970 Log() <<
kINFO <<
"<Negative Event Pairing> took: " <<
timer.GetElapsedTime()
982 std::vector<const Event*> newEventSample;
984 for (
UInt_t iev = 0; iev < fEventSample.size(); iev++){
985 if (fEventSample[iev]->
GetWeight() < 0) {
986 totalNegWeights += fEventSample[iev]->GetWeight();
987 totalWeights += fEventSample[iev]->GetWeight();
989 totalPosWeights += fEventSample[iev]->GetWeight();
990 totalWeights += fEventSample[iev]->GetWeight();
992 if (fEventSample[iev]->
GetWeight() > 0) {
993 newEventSample.push_back(
new Event(*fEventSample[iev]));
994 if (fEventSample[iev]->
GetClass() == fSignalClass){
995 sigWeight += fEventSample[iev]->GetWeight();
998 bkgWeight += fEventSample[iev]->GetWeight();
1003 if (totalNegWeights < 0)
Log() <<
kFATAL <<
" compenstion of negative event weights with positive ones did not work " << totalNegWeights <<
Endl;
1005 for (
UInt_t i=0; i<fEventSample.size(); i++)
delete fEventSample[i];
1006 fEventSample = newEventSample;
1008 Log() <<
kINFO <<
" after PreProcessing, the Event sample is left with " << fEventSample.size() <<
" events (unweighted), all with positive weights, adding up to " << totalWeights <<
Endl;
1009 Log() <<
kINFO <<
" nSig="<<nSig <<
" sigWeight="<<sigWeight <<
" nBkg="<<nBkg <<
" bkgWeight="<<bkgWeight <<
Endl;
1023 std::map<TString,TMVA::Interval*> tuneParameters;
1024 std::map<TString,Double_t> tunedParameters;
1033 tuneParameters.insert(std::pair<TString,Interval*>(
"NTrees",
new Interval(10,1000,5)));
1034 tuneParameters.insert(std::pair<TString,Interval*>(
"MaxDepth",
new Interval(2,4,3)));
1035 tuneParameters.insert(std::pair<TString,Interval*>(
"MinNodeSize",
new LogInterval(1,30,30)));
1040 if (fBoostType==
"AdaBoost"){
1041 tuneParameters.insert(std::pair<TString,Interval*>(
"AdaBoostBeta",
new Interval(.2,1.,5)));
1043 }
else if (fBoostType==
"Grad"){
1044 tuneParameters.insert(std::pair<TString,Interval*>(
"Shrinkage",
new Interval(0.05,0.50,5)));
1046 }
else if (fBoostType==
"Bagging" && fRandomisedTrees){
1049 tuneParameters.insert(std::pair<TString,Interval*>(
"UseNvars",
new Interval(min_var,max_var,4)));
1053 Log()<<
kINFO <<
" the following BDT parameters will be tuned on the respective *grid*\n"<<
Endl;
1054 std::map<TString,TMVA::Interval*>::iterator it;
1055 for(it=tuneParameters.begin(); it!= tuneParameters.end(); it++){
1062 tunedParameters=optimize.
optimize();
1064 return tunedParameters;
1073 std::map<TString,Double_t>::iterator it;
1074 for(it=tuneParameters.begin(); it!= tuneParameters.end(); it++){
1076 if (it->first ==
"MaxDepth" ) SetMaxDepth ((
Int_t)it->second);
1077 else if (it->first ==
"MinNodeSize" ) SetMinNodeSize (it->second);
1078 else if (it->first ==
"NTrees" ) SetNTrees ((
Int_t)it->second);
1079 else if (it->first ==
"NodePurityLimit") SetNodePurityLimit (it->second);
1080 else if (it->first ==
"AdaBoostBeta" ) SetAdaBoostBeta (it->second);
1081 else if (it->first ==
"Shrinkage" ) SetShrinkage (it->second);
1082 else if (it->first ==
"UseNvars" ) SetUseNvars ((
Int_t)it->second);
1083 else if (it->first ==
"BaggedSampleFraction" ) SetBaggedSampleFraction (it->second);
1084 else Log() <<
kFATAL <<
" SetParameter for " << it->first <<
" not yet implemented " <<
Endl;
1103 Log() <<
kERROR <<
" Zero Decision Trees demanded... that does not work !! "
1104 <<
" I set it to 1 .. just so that the program does not crash"
1111 if (IsNormalised())
Log() <<
kFATAL <<
"\"Normalise\" option cannot be used with BDT; "
1112 <<
"please remove the option from the configuration string, or "
1113 <<
"use \"!Normalise\""
1116 Log() <<
kINFO <<
"Training "<< fNTrees <<
" Decision Trees ... patience please" <<
Endl;
1118 Log() <<
kDEBUG <<
"Training with maximal depth = " <<fMaxDepth
1119 <<
", MinNodeEvents=" << fMinNodeEvents
1120 <<
", NTrees="<<fNTrees
1121 <<
", NodePurityLimit="<<fNodePurityLimit
1122 <<
", AdaBoostBeta="<<fAdaBoostBeta
1128 TString hname =
"AdaBooost weight distribution";
1134 if (DoRegression()) {
1138 hname=
"Boost event weights distribution";
1143 TH1*
h =
new TH1F(
"BoostWeight",hname,nBins,xMin,xMax);
1144 TH1* nodesBeforePruningVsTree =
new TH1I(
"NodesBeforePruning",
"nodes before pruning",fNTrees,0,fNTrees);
1145 TH1* nodesAfterPruningVsTree =
new TH1I(
"NodesAfterPruning",
"nodes after pruning",fNTrees,0,fNTrees);
1149 if(!DoMulticlass()){
1153 results->
Store(h,
"BoostWeights");
1157 if (fDoBoostMonitor){
1158 TH2* boostMonitor =
new TH2F(
"BoostMonitor",
"ROC Integral Vs iTree",2,0,fNTrees,2,0,1.05);
1160 boostMonitor->
SetYTitle(
"ROC Integral");
1161 results->
Store(boostMonitor,
"BoostMonitor");
1163 boostMonitorGraph->
SetName(
"BoostMonitorGraph");
1164 boostMonitorGraph->
SetTitle(
"ROCIntegralVsNTrees");
1165 results->
Store(boostMonitorGraph,
"BoostMonitorGraph");
1169 h =
new TH1F(
"BoostWeightVsTree",
"Boost weights vs tree",fNTrees,0,fNTrees);
1172 results->
Store(h,
"BoostWeightsVsTree");
1175 h =
new TH1F(
"ErrFractHist",
"error fraction vs tree number",fNTrees,0,fNTrees);
1178 results->
Store(h,
"ErrorFrac");
1181 nodesBeforePruningVsTree->
SetXTitle(
"#tree");
1182 nodesBeforePruningVsTree->
SetYTitle(
"#tree nodes");
1183 results->
Store(nodesBeforePruningVsTree);
1186 nodesAfterPruningVsTree->
SetXTitle(
"#tree");
1187 nodesAfterPruningVsTree->
SetYTitle(
"#tree nodes");
1188 results->
Store(nodesAfterPruningVsTree);
1192 fMonitorNtuple=
new TTree(
"MonitorNtuple",
"BDT variables");
1193 fMonitorNtuple->Branch(
"iTree",&fITree,
"iTree/I");
1194 fMonitorNtuple->Branch(
"boostWeight",&fBoostWeight,
"boostWeight/D");
1195 fMonitorNtuple->Branch(
"errorFraction",&fErrorFraction,
"errorFraction/D");
1198 Int_t nNodesBeforePruningCount = 0;
1199 Int_t nNodesAfterPruningCount = 0;
1201 Int_t nNodesBeforePruning = 0;
1202 Int_t nNodesAfterPruning = 0;
1205 if(fBoostType==
"Grad"){
1206 InitGradBoost(fEventSample);
1212 while (itree < fNTrees && continueBoost){
1225 if (fBoostType!=
"Grad"){
1226 Log() <<
kFATAL <<
"Multiclass is currently only supported by gradient boost. "
1227 <<
"Please change boost option accordingly (GradBoost)."
1230 UInt_t nClasses = DataInfo().GetNClasses();
1231 for (
UInt_t i=0;i<nClasses;i++){
1232 fForest.push_back(
new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), i,
1233 fRandomisedTrees, fUseNvars, fUsePoissonNvars, fMaxDepth,
1234 itree*nClasses+i, fNodePurityLimit, itree*nClasses+1));
1235 fForest.back()->SetNVars(GetNvar());
1236 if (fUseFisherCuts) {
1237 fForest.back()->SetUseFisherCuts();
1238 fForest.back()->SetMinLinCorrForFisher(fMinLinCorrForFisher);
1239 fForest.back()->SetUseExclusiveVars(fUseExclusiveVars);
1243 nNodesBeforePruning = fForest.back()->BuildTree(*fTrainSample);
1244 Double_t bw = this->Boost(*fTrainSample, fForest.back(),i);
1246 fBoostWeights.push_back(bw);
1248 fBoostWeights.push_back(0);
1256 fForest.push_back(
new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), fSignalClass,
1257 fRandomisedTrees, fUseNvars, fUsePoissonNvars, fMaxDepth,
1258 itree, fNodePurityLimit, itree));
1259 fForest.back()->SetNVars(GetNvar());
1260 if (fUseFisherCuts) {
1261 fForest.back()->SetUseFisherCuts();
1262 fForest.back()->SetMinLinCorrForFisher(fMinLinCorrForFisher);
1263 fForest.back()->SetUseExclusiveVars(fUseExclusiveVars);
1266 nNodesBeforePruning = fForest.back()->BuildTree(*fTrainSample);
1268 if (fUseYesNoLeaf && !DoRegression() && fBoostType!=
"Grad") {
1269 nNodesBeforePruning = fForest.back()->CleanTree();
1272 nNodesBeforePruningCount += nNodesBeforePruning;
1273 nodesBeforePruningVsTree->
SetBinContent(itree+1,nNodesBeforePruning);
1275 fForest.back()->SetPruneMethod(fPruneMethod);
1276 fForest.back()->SetPruneStrength(fPruneStrength);
1278 std::vector<const Event*> * validationSample =
NULL;
1279 if(fAutomatic) validationSample = &fValidationSample;
1281 Double_t bw = this->Boost(*fTrainSample, fForest.back());
1283 fBoostWeights.push_back(bw);
1285 fBoostWeights.push_back(0);
1297 if (fUseYesNoLeaf && !DoRegression() && fBoostType!=
"Grad"){
1298 fForest.back()->CleanTree();
1300 nNodesAfterPruning = fForest.back()->GetNNodes();
1301 nNodesAfterPruningCount += nNodesAfterPruning;
1302 nodesAfterPruningVsTree->
SetBinContent(itree+1,nNodesAfterPruning);
1305 fMonitorNtuple->Fill();
1306 if (fDoBoostMonitor){
1307 if (! DoRegression() ){
1308 if ( itree==fNTrees-1 || (!(itree%500)) ||
1309 (!(itree%250) && itree <1000)||
1310 (!(itree%100) && itree < 500)||
1311 (!(itree%50) && itree < 250)||
1312 (!(itree%25) && itree < 150)||
1313 (!(itree%10) && itree < 50)||
1314 (!(itree%5) && itree < 20)
1315 ) BoostMonitor(itree);
1326 Log() <<
kINFO <<
"<Train> average number of nodes (w/o pruning) : "
1327 << nNodesBeforePruningCount/GetNTrees() <<
Endl;
1330 Log() <<
kINFO <<
"<Train> average number of nodes before/after pruning : "
1331 << nNodesBeforePruningCount/GetNTrees() <<
" / "
1332 << nNodesAfterPruningCount/GetNTrees()
1340 Log() <<
kDEBUG <<
"Now I delete the privat data sample"<<
Endl;
1341 for (
UInt_t i=0; i<fEventSample.size(); i++)
delete fEventSample[i];
1342 for (
UInt_t i=0; i<fValidationSample.size(); i++)
delete fValidationSample[i];
1343 fEventSample.clear();
1344 fValidationSample.clear();
1355 for (
UInt_t itree=0; itree<nTrees; itree++) {
1357 sum += fForest[itree]->CheckEvent(e,
kFALSE);
1360 return 2.0/(1.0+
exp(-2.0*sum))-1;
1369 UInt_t nClasses = DataInfo().GetNClasses();
1370 for (std::vector<const TMVA::Event*>::iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1371 fResiduals[*e].at(cls)+=fForest.back()->CheckEvent(*e,
kFALSE);
1372 if(cls == nClasses-1){
1373 for(
UInt_t i=0;i<nClasses;i++){
1375 for(
UInt_t j=0;j<nClasses;j++){
1377 norm+=
exp(fResiduals[*e].
at(j)-fResiduals[*e].
at(i));
1380 Double_t res = ((*e)->GetClass()==i)?(1.0-p_cls):(-p_cls);
1387 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1388 fResiduals[*e].at(0)+=fForest.back()->CheckEvent(*e,
kFALSE);
1390 Double_t res = (DataInfo().IsSignal(*e)?1:0)-p_sig;
1401 for (std::vector<const TMVA::Event*>::const_iterator e=fEventSample.begin(); e!=fEventSample.end();e++) {
1403 fWeightedResiduals[*e].first -= fForest.back()->CheckEvent(*e,
kFALSE);
1409 vector< std::pair<Double_t, Double_t> > temp;
1410 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++){
1411 temp.push_back(make_pair(
fabs(fWeightedResiduals[*e].first),fWeightedResiduals[*e].second));
1412 fSumOfWeights += (*e)->GetWeight();
1414 fTransitionPoint = GetWeightedQuantile(temp,0.7,fSumOfWeights);
1417 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1419 if(temp[i].first<=fTransitionPoint)
1422 const_cast<TMVA::Event*
>(*e)->
SetTarget(0,fTransitionPoint*(fWeightedResiduals[*e].first<0?-1.0:1.0));
1432 std::sort(vec.begin(), vec.end());
1434 while(i<vec.size() && temp <= norm*quantile){
1435 temp += vec[i].second;
1438 if (i >= vec.size())
return 0.;
1439 return vec[i].first;
1447 std::map<TMVA::DecisionTreeNode*,std::vector<Double_t> > leaves;
1448 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1449 Double_t weight = (*e)->GetWeight();
1451 if ((leaves[node]).empty()){
1452 (leaves[node]).push_back((*e)->GetTarget(cls)* weight);
1453 (leaves[node]).push_back(
fabs((*e)->GetTarget(cls))*(1.0-
fabs((*e)->GetTarget(cls))) * weight* weight);
1456 (leaves[node])[0]+=((*e)->GetTarget(cls)* weight);
1457 (leaves[node])[1]+=
fabs((*e)->GetTarget(cls))*(1.0-
fabs((*e)->GetTarget(cls))) * weight* weight;
1461 iLeave!=leaves.end();++iLeave){
1462 if ((iLeave->second)[1]<1e-30) (iLeave->second)[1]=1e-30;
1464 (iLeave->first)->SetResponse(fShrinkage/DataInfo().GetNClasses()*(iLeave->second)[0]/((iLeave->second)[1]));
1469 DoMulticlass() ? UpdateTargets(fEventSample, cls) : UpdateTargets(fEventSample);
1478 std::map<TMVA::DecisionTreeNode*,Double_t > leaveWeights;
1479 std::map<TMVA::DecisionTreeNode*,vector< std::pair<Double_t, Double_t> > > leaves;
1481 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1483 (leaves[node]).push_back(make_pair(fWeightedResiduals[*e].first,(*e)->GetWeight()));
1484 (leaveWeights[node]) += (*e)->GetWeight();
1488 for (std::map<
TMVA::DecisionTreeNode*,vector< std::pair<Double_t, Double_t> > >::iterator iLeave=leaves.begin();
1489 iLeave!=leaves.end();++iLeave){
1491 Double_t ResidualMedian = GetWeightedQuantile(iLeave->second,0.5,leaveWeights[iLeave->first]);
1492 for(
UInt_t j=0;j<((iLeave->second).size());j++){
1493 diff = (iLeave->second)[j].first-ResidualMedian;
1494 shift+=1.0/((iLeave->second).size())*((diff<0)?-1.0:1.0)*
TMath::Min(fTransitionPoint,
fabs(diff));
1496 (iLeave->first)->SetResponse(fShrinkage*(ResidualMedian+shift));
1499 UpdateTargetsRegression(*fTrainSample);
1510 std::vector<std::pair<Double_t, Double_t> > temp;
1512 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1513 fWeightedResiduals[*e]= make_pair((*e)->GetTarget(0), (*e)->GetWeight());
1514 fSumOfWeights+=(*e)->GetWeight();
1515 temp.push_back(make_pair(fWeightedResiduals[*e].first,fWeightedResiduals[*e].second));
1517 Double_t weightedMedian = GetWeightedQuantile(temp,0.5, fSumOfWeights);
1520 fBoostWeights.push_back(weightedMedian);
1521 std::map<const TMVA::Event*, std::pair<Double_t, Double_t> >::iterator res = fWeightedResiduals.begin();
1522 for (; res!=fWeightedResiduals.end(); ++res ) {
1524 (*res).second.first -= weightedMedian;
1527 UpdateTargetsRegression(*fTrainSample,
kTRUE);
1531 else if(DoMulticlass()){
1532 UInt_t nClasses = DataInfo().GetNClasses();
1533 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1534 for (
UInt_t i=0;i<nClasses;i++){
1536 Double_t r = (*e)->GetClass()==i?(1-1.0/nClasses):(-1.0/nClasses);
1538 fResiduals[*e].push_back(0);
1543 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1544 Double_t r = (DataInfo().IsSignal(*e)?1:0)-0.5;
1546 fResiduals[*e].push_back(0);
1557 for (
UInt_t ievt=0; ievt<fValidationSample.size(); ievt++) {
1558 Bool_t isSignalType= (dt->
CheckEvent(fValidationSample[ievt]) > fNodePurityLimit ) ? 1 : 0;
1560 if (isSignalType == (DataInfo().IsSignal(fValidationSample[ievt])) ) {
1561 ncorrect += fValidationSample[ievt]->GetWeight();
1564 nfalse += fValidationSample[ievt]->GetWeight();
1568 return ncorrect / (ncorrect + nfalse);
1579 if (fBoostType==
"AdaBoost") returnVal = this->AdaBoost (eventSample, dt);
1580 else if (fBoostType==
"AdaCost") returnVal = this->AdaCost (eventSample, dt);
1581 else if (fBoostType==
"Bagging") returnVal = this->Bagging ( );
1582 else if (fBoostType==
"RegBoost") returnVal = this->RegBoost (eventSample, dt);
1583 else if (fBoostType==
"AdaBoostR2") returnVal = this->AdaBoostR2(eventSample, dt);
1584 else if (fBoostType==
"Grad"){
1586 returnVal = this->GradBoostRegression(eventSample, dt);
1587 else if(DoMulticlass())
1588 returnVal = this->GradBoost (eventSample, dt, cls);
1590 returnVal = this->GradBoost (eventSample, dt);
1594 Log() <<
kFATAL <<
"<Boost> unknown boost option " << fBoostType<<
" called" <<
Endl;
1598 GetBaggedSubSample(fEventSample);
1613 TH1F *tmpS =
new TH1F(
"tmpS",
"", 100 , -1., 1.00001 );
1614 TH1F *tmpB =
new TH1F(
"tmpB",
"", 100 , -1., 1.00001 );
1618 UInt_t signalClassNr = DataInfo().GetClassInfo(
"Signal")->GetNumber();
1629 for (
UInt_t iev=0; iev < nevents; iev++){
1630 const Event*
event = GetTestingEvent(iev);
1632 if (event->GetClass() == signalClassNr) {tmp=tmpS;}
1634 tmp->
Fill(PrivateGetMvaValue(event),event->GetWeight());
1638 std::vector<TH1F*> hS;
1639 std::vector<TH1F*> hB;
1640 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
1641 hS.push_back(
new TH1F(
Form(
"SigVar%dAtTree%d",ivar,iTree),
Form(
"SigVar%dAtTree%d",ivar,iTree),100,DataInfo().GetVariableInfo(ivar).GetMin(),DataInfo().GetVariableInfo(ivar).GetMax()));
1642 hB.push_back(
new TH1F(
Form(
"BkgVar%dAtTree%d",ivar,iTree),
Form(
"BkgVar%dAtTree%d",ivar,iTree),100,DataInfo().GetVariableInfo(ivar).GetMin(),DataInfo().GetVariableInfo(ivar).GetMax()));
1643 results->
Store(hS.back(),hS.back()->GetTitle());
1644 results->
Store(hB.back(),hB.back()->GetTitle());
1648 for (
UInt_t iev=0; iev < fEventSample.size(); iev++){
1649 if (fEventSample[iev]->GetBoostWeight() >
max) max = 1.01*fEventSample[iev]->GetBoostWeight();
1651 TH1F *tmpBoostWeightsS =
new TH1F(
Form(
"BoostWeightsInTreeS%d",iTree),
Form(
"BoostWeightsInTreeS%d",iTree),100,0.,max);
1652 TH1F *tmpBoostWeightsB =
new TH1F(
Form(
"BoostWeightsInTreeB%d",iTree),
Form(
"BoostWeightsInTreeB%d",iTree),100,0.,max);
1653 results->
Store(tmpBoostWeightsS,tmpBoostWeightsS->
GetTitle());
1654 results->
Store(tmpBoostWeightsB,tmpBoostWeightsB->
GetTitle());
1656 TH1F *tmpBoostWeights;
1657 std::vector<TH1F*> *
h;
1659 for (
UInt_t iev=0; iev < fEventSample.size(); iev++){
1660 if (fEventSample[iev]->
GetClass() == signalClassNr) {
1661 tmpBoostWeights=tmpBoostWeightsS;
1664 tmpBoostWeights=tmpBoostWeightsB;
1667 tmpBoostWeights->
Fill(fEventSample[iev]->GetBoostWeight());
1668 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
1669 (*h)[ivar]->Fill(fEventSample[iev]->GetValue(ivar),fEventSample[iev]->
GetWeight());
1705 Double_t err=0, sumGlobalw=0, sumGlobalwfalse=0, sumGlobalwfalse2=0;
1707 std::vector<Double_t> sumw(DataInfo().GetNClasses(),0);
1708 std::map<Node*,Int_t> sigEventsInNode;
1711 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1714 UInt_t iclass=(*e)->GetClass();
1717 if ( DoRegression() ) {
1719 sumGlobalwfalse += w * tmpDev;
1720 sumGlobalwfalse2 += w * tmpDev*tmpDev;
1721 if (tmpDev > maxDev) maxDev = tmpDev;
1725 Bool_t isSignalType = (dt->
CheckEvent(*e,fUseYesNoLeaf) > fNodePurityLimit );
1726 if (!(isSignalType == DataInfo().IsSignal(*e))) {
1727 sumGlobalwfalse+= w;
1732 if (DataInfo().IsSignal(*e)) trueType = 1;
1734 sumGlobalwfalse+= w*trueType*dtoutput;
1739 err = sumGlobalwfalse/sumGlobalw ;
1740 if ( DoRegression() ) {
1742 if (fAdaBoostR2Loss==
"linear"){
1743 err = sumGlobalwfalse/maxDev/sumGlobalw ;
1745 else if (fAdaBoostR2Loss==
"quadratic"){
1746 err = sumGlobalwfalse2/maxDev/maxDev/sumGlobalw ;
1748 else if (fAdaBoostR2Loss==
"exponential"){
1750 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1753 err += w * (1 -
exp (-tmpDev/maxDev)) / sumGlobalw;
1758 Log() <<
kFATAL <<
" you've chosen a Loss type for Adaboost other than linear, quadratic or exponential "
1759 <<
" namely " << fAdaBoostR2Loss <<
"\n"
1760 <<
"and this is not implemented... a typo in the options ??" <<
Endl;
1764 Log() <<
kDEBUG <<
"BDT AdaBoos wrong/all: " << sumGlobalwfalse <<
"/" << sumGlobalw <<
Endl;
1768 std::vector<Double_t> newSumw(sumw.size(),0);
1771 if (err >= 0.5 && fUseYesNoLeaf) {
1775 Log() <<
kERROR <<
" YOUR tree has only 1 Node... kind of a funny *tree*. I cannot "
1776 <<
"boost such a thing... if after 1 step the error rate is == 0.5"
1778 <<
"please check why this happens, maybe too many events per node requested ?"
1782 Log() <<
kERROR <<
" The error rate in the BDT boosting is > 0.5. ("<< err
1783 <<
") That should not happen, please check your code (i.e... the BDT code), I "
1784 <<
" stop boosting here" <<
Endl;
1788 }
else if (err < 0) {
1789 Log() <<
kERROR <<
" The error rate in the BDT boosting is < 0. That can happen"
1790 <<
" due to improper treatment of negative weights in a Monte Carlo.. (if you have"
1791 <<
" an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)"
1792 <<
" for the time being I set it to its absolute value.. just to continue.." <<
Endl;
1796 boostWeight =
TMath::Log((1.-err)/err)*fAdaBoostBeta;
1798 boostWeight =
TMath::Log((1.+err)/(1-err))*fAdaBoostBeta;
1801 Log() <<
kDEBUG <<
"BDT AdaBoos wrong/all: " << sumGlobalwfalse <<
"/" << sumGlobalw <<
" 1-err/err="<<boostWeight<<
" log.."<<
TMath::Log(boostWeight)<<
Endl;
1806 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1808 if (fUseYesNoLeaf||DoRegression()){
1809 if ((!( (dt->
CheckEvent(*e,fUseYesNoLeaf) > fNodePurityLimit ) == DataInfo().IsSignal(*e))) || DoRegression()) {
1813 if ( (*e)->GetWeight() > 0 ){
1814 (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
1816 if (DoRegression()) results->
GetHist(
"BoostWeights")->
Fill(boostfactor);
1818 if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. / boostfactor);
1819 else (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
1827 if (DataInfo().IsSignal(*e)) trueType = 1;
1831 if ( (*e)->GetWeight() > 0 ){
1832 (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
1834 if (DoRegression()) results->
GetHist(
"BoostWeights")->
Fill(boostfactor);
1836 if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. / boostfactor);
1837 else (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
1840 newSumGlobalw+=(*e)->GetWeight();
1841 newSumw[(*e)->GetClass()] += (*e)->GetWeight();
1847 Log() <<
kDEBUG <<
"new Nsig="<<newSumw[0]*globalNormWeight <<
" new Nbkg="<<newSumw[1]*globalNormWeight <<
Endl;
1850 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1854 if (DataInfo().IsSignal(*e))(*e)->ScaleBoostWeight( globalNormWeight * fSigToBkgFraction );
1855 else (*e)->ScaleBoostWeight( globalNormWeight );
1858 if (!(DoRegression()))results->
GetHist(
"BoostWeights")->
Fill(boostWeight);
1862 fBoostWeight = boostWeight;
1863 fErrorFraction = err;
1891 Double_t err=0, sumGlobalWeights=0, sumGlobalCost=0;
1893 std::vector<Double_t> sumw(DataInfo().GetNClasses(),0);
1894 std::map<Node*,Int_t> sigEventsInNode;
1896 for (vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1898 sumGlobalWeights += w;
1899 UInt_t iclass=(*e)->GetClass();
1903 if ( DoRegression() ) {
1904 Log() <<
kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
1909 Bool_t isTrueSignal = DataInfo().IsSignal(*e);
1910 Bool_t isSelectedSignal = (dtoutput>0);
1911 if (isTrueSignal) trueType = 1;
1915 if (isTrueSignal && isSelectedSignal) cost=Css;
1916 else if (isTrueSignal && !isSelectedSignal) cost=Cts_sb;
1917 else if (!isTrueSignal && isSelectedSignal) cost=Ctb_ss;
1918 else if (!isTrueSignal && !isSelectedSignal) cost=Cbb;
1919 else Log() <<
kERROR <<
"something went wrong in AdaCost" <<
Endl;
1921 sumGlobalCost+= w*trueType*dtoutput*cost;
1926 if ( DoRegression() ) {
1927 Log() <<
kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
1932 sumGlobalCost /= sumGlobalWeights;
1937 vector<Double_t> newSumClassWeights(sumw.size(),0);
1939 Double_t boostWeight =
TMath::Log((1+sumGlobalCost)/(1-sumGlobalCost)) * fAdaBoostBeta;
1943 for (vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1946 Bool_t isTrueSignal = DataInfo().IsSignal(*e);
1947 Bool_t isSelectedSignal = (dtoutput>0);
1948 if (isTrueSignal) trueType = 1;
1952 if (isTrueSignal && isSelectedSignal) cost=Css;
1953 else if (isTrueSignal && !isSelectedSignal) cost=Cts_sb;
1954 else if (!isTrueSignal && isSelectedSignal) cost=Ctb_ss;
1955 else if (!isTrueSignal && !isSelectedSignal) cost=Cbb;
1956 else Log() <<
kERROR <<
"something went wrong in AdaCost" <<
Endl;
1959 if (DoRegression())
Log() <<
kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
1960 if ( (*e)->GetWeight() > 0 ){
1961 (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
1963 if (DoRegression())
Log() <<
kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
1965 if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. / boostfactor);
1968 newSumGlobalWeights+=(*e)->GetWeight();
1969 newSumClassWeights[(*e)->GetClass()] += (*e)->GetWeight();
1974 Double_t globalNormWeight=
Double_t(eventSample.size())/newSumGlobalWeights;
1975 Log() <<
kDEBUG <<
"new Nsig="<<newSumClassWeights[0]*globalNormWeight <<
" new Nbkg="<<newSumClassWeights[1]*globalNormWeight <<
Endl;
1978 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
1981 if (DataInfo().IsSignal(*e))(*e)->ScaleBoostWeight( globalNormWeight * fSigToBkgFraction );
1982 else (*e)->ScaleBoostWeight( globalNormWeight );
1986 if (!(DoRegression()))results->
GetHist(
"BoostWeights")->
Fill(boostWeight);
1990 fBoostWeight = boostWeight;
1991 fErrorFraction = err;
2019 if (!fSubSample.empty()) fSubSample.clear();
2021 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
2022 n = trandom->
PoissonD(fBaggedSampleFraction);
2023 for (
Int_t i=0;i<
n;i++) fSubSample.push_back(*e);
2058 if ( !DoRegression() )
Log() <<
kFATAL <<
"Somehow you chose a regression boost method for a classification job" <<
Endl;
2060 Double_t err=0, sumw=0, sumwfalse=0, sumwfalse2=0;
2062 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
2067 sumwfalse += w * tmpDev;
2068 sumwfalse2 += w * tmpDev*tmpDev;
2069 if (tmpDev > maxDev) maxDev = tmpDev;
2073 if (fAdaBoostR2Loss==
"linear"){
2074 err = sumwfalse/maxDev/sumw ;
2076 else if (fAdaBoostR2Loss==
"quadratic"){
2077 err = sumwfalse2/maxDev/maxDev/sumw ;
2079 else if (fAdaBoostR2Loss==
"exponential"){
2081 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
2084 err += w * (1 -
exp (-tmpDev/maxDev)) / sumw;
2089 Log() <<
kFATAL <<
" you've chosen a Loss type for Adaboost other than linear, quadratic or exponential "
2090 <<
" namely " << fAdaBoostR2Loss <<
"\n"
2091 <<
"and this is not implemented... a typo in the options ??" <<
Endl;
2099 Log() <<
kERROR <<
" YOUR tree has only 1 Node... kind of a funny *tree*. I cannot "
2100 <<
"boost such a thing... if after 1 step the error rate is == 0.5"
2102 <<
"please check why this happens, maybe too many events per node requested ?"
2106 Log() <<
kERROR <<
" The error rate in the BDT boosting is > 0.5. ("<< err
2107 <<
") That should not happen, but is possible for regression trees, and"
2108 <<
" should trigger a stop for the boosting. please check your code (i.e... the BDT code), I "
2109 <<
" stop boosting " <<
Endl;
2113 }
else if (err < 0) {
2114 Log() <<
kERROR <<
" The error rate in the BDT boosting is < 0. That can happen"
2115 <<
" due to improper treatment of negative weights in a Monte Carlo.. (if you have"
2116 <<
" an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)"
2117 <<
" for the time being I set it to its absolute value.. just to continue.." <<
Endl;
2121 Double_t boostWeight = err / (1.-err);
2126 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
2128 results->
GetHist(
"BoostWeights")->
Fill(boostfactor);
2130 if ( (*e)->GetWeight() > 0 ){
2131 Float_t newBoostWeight = (*e)->GetBoostWeight() * boostfactor;
2132 Float_t newWeight = (*e)->GetWeight() * (*e)->GetBoostWeight() * boostfactor;
2133 if (newWeight == 0) {
2134 Log() <<
kINFO <<
"Weight= " << (*e)->GetWeight() <<
Endl;
2135 Log() <<
kINFO <<
"BoostWeight= " << (*e)->GetBoostWeight() <<
Endl;
2136 Log() <<
kINFO <<
"boostweight="<<boostWeight <<
" err= " <<err <<
Endl;
2137 Log() <<
kINFO <<
"NewBoostWeight= " << newBoostWeight <<
Endl;
2138 Log() <<
kINFO <<
"boostfactor= " << boostfactor <<
Endl;
2141 Log() <<
kINFO <<
"target = " << (*e)->GetTarget(0) <<
Endl;
2144 (*e)->SetBoostWeight( newBoostWeight );
2147 (*e)->SetBoostWeight( (*e)->GetBoostWeight() / boostfactor);
2149 newSumw+=(*e)->GetWeight();
2153 Double_t normWeight = sumw / newSumw;
2154 for (std::vector<const TMVA::Event*>::const_iterator e=eventSample.begin(); e!=eventSample.end();e++) {
2157 (*e)->SetBoostWeight( (*e)->GetBoostWeight() * normWeight );
2164 fBoostWeight = boostWeight;
2165 fErrorFraction = err;
2177 if (fDoPreselection){
2178 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
2179 gTools().
AddAttr( wght,
Form(
"PreselectionLowBkgVar%d",ivar), fIsLowBkgCut[ivar]);
2180 gTools().
AddAttr( wght,
Form(
"PreselectionLowBkgVar%dValue",ivar), fLowBkgCut[ivar]);
2181 gTools().
AddAttr( wght,
Form(
"PreselectionLowSigVar%d",ivar), fIsLowSigCut[ivar]);
2182 gTools().
AddAttr( wght,
Form(
"PreselectionLowSigVar%dValue",ivar), fLowSigCut[ivar]);
2183 gTools().
AddAttr( wght,
Form(
"PreselectionHighBkgVar%d",ivar), fIsHighBkgCut[ivar]);
2184 gTools().
AddAttr( wght,
Form(
"PreselectionHighBkgVar%dValue",ivar),fHighBkgCut[ivar]);
2185 gTools().
AddAttr( wght,
Form(
"PreselectionHighSigVar%d",ivar), fIsHighSigCut[ivar]);
2186 gTools().
AddAttr( wght,
Form(
"PreselectionHighSigVar%dValue",ivar),fHighSigCut[ivar]);
2192 gTools().
AddAttr( wght,
"AnalysisType", fForest.back()->GetAnalysisType() );
2194 for (
UInt_t i=0; i< fForest.size(); i++) {
2195 void* trxml = fForest[i]->AddXMLTo(wght);
2206 for (i=0; i<fForest.size(); i++)
delete fForest[i];
2208 fBoostWeights.clear();
2215 if (
gTools().HasAttr( parent,
Form(
"PreselectionLowBkgVar%d",0))) {
2216 fIsLowBkgCut.resize(GetNvar());
2217 fLowBkgCut.resize(GetNvar());
2218 fIsLowSigCut.resize(GetNvar());
2219 fLowSigCut.resize(GetNvar());
2220 fIsHighBkgCut.resize(GetNvar());
2221 fHighBkgCut.resize(GetNvar());
2222 fIsHighSigCut.resize(GetNvar());
2223 fHighSigCut.resize(GetNvar());
2227 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
2229 fIsLowBkgCut[ivar]=tmpBool;
2231 fLowBkgCut[ivar]=tmpDouble;
2233 fIsLowSigCut[ivar]=tmpBool;
2235 fLowSigCut[ivar]=tmpDouble;
2237 fIsHighBkgCut[ivar]=tmpBool;
2239 fHighBkgCut[ivar]=tmpDouble;
2241 fIsHighSigCut[ivar]=tmpBool;
2243 fHighSigCut[ivar]=tmpDouble;
2250 if(
gTools().HasAttr(parent,
"TreeType")) {
2261 fForest.back()->SetTreeID(i++);
2263 fBoostWeights.push_back(boostWeight);
2275 Int_t analysisType(0);
2278 istr >> dummy >> fNTrees;
2279 Log() <<
kINFO <<
"Read " << fNTrees <<
" Decision trees" <<
Endl;
2281 for (
UInt_t i=0;i<fForest.size();i++)
delete fForest[i];
2283 fBoostWeights.clear();
2286 for (
int i=0;i<fNTrees;i++) {
2287 istr >> dummy >> iTree >> dummy >> boostWeight;
2289 fForest.back()->Print( std::cout );
2290 Log() <<
kFATAL <<
"Error while reading weight file; mismatch iTree="
2291 << iTree <<
" i=" << i
2292 <<
" dummy " << dummy
2293 <<
" boostweight " << boostWeight
2298 fForest.back()->SetTreeID(i);
2299 fForest.back()->
Read(istr, GetTrainingTMVAVersionCode());
2300 fBoostWeights.push_back(boostWeight);
2307 return this->GetMvaValue( err, errUpper, 0 );
2317 const Event* ev = GetEvent();
2318 if (fDoPreselection) {
2319 Double_t val = ApplyPreselectionCuts(ev);
2322 return PrivateGetMvaValue(ev, err, errUpper, useNTrees);
2333 NoErrorCalc(err, errUpper);
2337 UInt_t nTrees = fForest.size();
2339 if (useNTrees > 0 ) nTrees = useNTrees;
2341 if (fBoostType==
"Grad")
return GetGradBoostMVA(ev,nTrees);
2345 for (
UInt_t itree=0; itree<nTrees; itree++) {
2347 myMVA += fBoostWeights[itree] * fForest[itree]->CheckEvent(ev,fUseYesNoLeaf);
2348 norm += fBoostWeights[itree];
2360 if (fMulticlassReturnVal ==
NULL) fMulticlassReturnVal =
new std::vector<Float_t>();
2361 fMulticlassReturnVal->clear();
2363 std::vector<double> temp;
2365 UInt_t nClasses = DataInfo().GetNClasses();
2366 for(
UInt_t iClass=0; iClass<nClasses; iClass++){
2367 temp.push_back(0.0);
2368 for(
UInt_t itree = iClass; itree<fForest.size(); itree+=nClasses){
2369 temp[iClass] += fForest[itree]->CheckEvent(e,
kFALSE);
2373 for(
UInt_t iClass=0; iClass<nClasses; iClass++){
2375 for(
UInt_t j=0;j<nClasses;j++){
2377 norm+=
exp(temp[j]-temp[iClass]);
2379 (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
2383 return *fMulticlassReturnVal;
2395 if (fRegressionReturnVal ==
NULL) fRegressionReturnVal =
new std::vector<Float_t>();
2396 fRegressionReturnVal->clear();
2398 const Event * ev = GetEvent();
2403 if (fBoostType==
"AdaBoostR2") {
2414 vector< Double_t > response(fForest.size());
2415 vector< Double_t > weight(fForest.size());
2418 for (
UInt_t itree=0; itree<fForest.size(); itree++) {
2419 response[itree] = fForest[itree]->CheckEvent(ev,
kFALSE);
2420 weight[itree] = fBoostWeights[itree];
2421 totalSumOfWeights += fBoostWeights[itree];
2424 std::vector< std::vector<Double_t> > vtemp;
2425 vtemp.push_back( response );
2426 vtemp.push_back( weight );
2431 while (sumOfWeights <= totalSumOfWeights/2.) {
2432 sumOfWeights += vtemp[1][t];
2446 else if(fBoostType==
"Grad"){
2447 for (
UInt_t itree=0; itree<fForest.size(); itree++) {
2448 myMVA += fForest[itree]->CheckEvent(ev,
kFALSE);
2451 evT->
SetTarget(0, myMVA+fBoostWeights[0] );
2454 for (
UInt_t itree=0; itree<fForest.size(); itree++) {
2456 myMVA += fBoostWeights[itree] * fForest[itree]->CheckEvent(ev,
kFALSE);
2457 norm += fBoostWeights[itree];
2465 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
2466 fRegressionReturnVal->push_back( evT2->
GetTarget(0) );
2471 return *fRegressionReturnVal;
2480 Log() <<
kINFO <<
"Write monitoring histograms to file: " << BaseDir()->GetPath() <<
Endl;
2484 fMonitorNtuple->
Write();
2495 fVariableImportance.resize(GetNvar());
2496 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
2497 fVariableImportance[ivar]=0;
2500 for (
UInt_t itree = 0; itree < GetNTrees(); itree++) {
2501 std::vector<Double_t> relativeImportance(fForest[itree]->GetVariableImportance());
2502 for (
UInt_t i=0; i< relativeImportance.size(); i++) {
2503 fVariableImportance[i] += fBoostWeights[itree] * relativeImportance[i];
2507 for (
UInt_t ivar=0; ivar< fVariableImportance.size(); ivar++){
2508 fVariableImportance[ivar] =
TMath::Sqrt(fVariableImportance[ivar]);
2509 sum += fVariableImportance[ivar];
2511 for (
UInt_t ivar=0; ivar< fVariableImportance.size(); ivar++) fVariableImportance[ivar] /= sum;
2513 return fVariableImportance;
2523 std::vector<Double_t> relativeImportance = this->GetVariableImportance();
2524 if (ivar < (
UInt_t)relativeImportance.size())
return relativeImportance[ivar];
2525 else Log() <<
kFATAL <<
"<GetVariableImportance> ivar = " << ivar <<
" is out of range " <<
Endl;
2536 fRanking =
new Ranking( GetName(),
"Variable Importance" );
2537 vector< Double_t> importance(this->GetVariableImportance());
2539 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
2541 fRanking->AddRank(
Rank( GetInputLabel(ivar), importance[ivar] ) );
2558 Log() <<
"Boosted Decision Trees are a collection of individual decision" <<
Endl;
2559 Log() <<
"trees which form a multivariate classifier by (weighted) majority " <<
Endl;
2560 Log() <<
"vote of the individual trees. Consecutive decision trees are " <<
Endl;
2561 Log() <<
"trained using the original training data set with re-weighted " <<
Endl;
2562 Log() <<
"events. By default, the AdaBoost method is employed, which gives " <<
Endl;
2563 Log() <<
"events that were misclassified in the previous tree a larger " <<
Endl;
2564 Log() <<
"weight in the training of the following tree." <<
Endl;
2566 Log() <<
"Decision trees are a sequence of binary splits of the data sample" <<
Endl;
2567 Log() <<
"using a single descriminant variable at a time. A test event " <<
Endl;
2568 Log() <<
"ending up after the sequence of left-right splits in a final " <<
Endl;
2569 Log() <<
"(\"leaf\") node is classified as either signal or background" <<
Endl;
2570 Log() <<
"depending on the majority type of training events in that node." <<
Endl;
2574 Log() <<
"By the nature of the binary splits performed on the individual" <<
Endl;
2575 Log() <<
"variables, decision trees do not deal well with linear correlations" <<
Endl;
2576 Log() <<
"between variables (they need to approximate the linear split in" <<
Endl;
2577 Log() <<
"the two dimensional space by a sequence of splits on the two " <<
Endl;
2578 Log() <<
"variables individually). Hence decorrelation could be useful " <<
Endl;
2579 Log() <<
"to optimise the BDT performance." <<
Endl;
2583 Log() <<
"The two most important parameters in the configuration are the " <<
Endl;
2584 Log() <<
"minimal number of events requested by a leaf node as percentage of the " <<
Endl;
2585 Log() <<
" number of training events (option \"MinNodeSize\" replacing the actual number " <<
Endl;
2586 Log() <<
" of events \"nEventsMin\" as given in earlier versions" <<
Endl;
2587 Log() <<
"If this number is too large, detailed features " <<
Endl;
2588 Log() <<
"in the parameter space are hard to be modelled. If it is too small, " <<
Endl;
2589 Log() <<
"the risk to overtrain rises and boosting seems to be less effective" <<
Endl;
2590 Log() <<
" typical values from our current expericience for best performance " <<
Endl;
2591 Log() <<
" are between 0.5(%) and 10(%) " <<
Endl;
2593 Log() <<
"The default minimal number is currently set to " <<
Endl;
2594 Log() <<
" max(20, (N_training_events / N_variables^2 / 10)) " <<
Endl;
2595 Log() <<
"and can be changed by the user." <<
Endl;
2597 Log() <<
"The other crucial parameter, the pruning strength (\"PruneStrength\")," <<
Endl;
2598 Log() <<
"is also related to overtraining. It is a regularisation parameter " <<
Endl;
2599 Log() <<
"that is used when determining after the training which splits " <<
Endl;
2600 Log() <<
"are considered statistically insignificant and are removed. The" <<
Endl;
2601 Log() <<
"user is advised to carefully watch the BDT screen output for" <<
Endl;
2602 Log() <<
"the comparison between efficiencies obtained on the training and" <<
Endl;
2603 Log() <<
"the independent test sample. They should be equal within statistical" <<
Endl;
2604 Log() <<
"errors, in order to minimize statistical fluctuations in different samples." <<
Endl;
2616 fout <<
" std::vector<"<<nodeName<<
"*> fForest; // i.e. root nodes of decision trees" << std::endl;
2617 fout <<
" std::vector<double> fBoostWeights; // the weights applied in the individual boosts" << std::endl;
2618 fout <<
"};" << std::endl << std::endl;
2619 fout <<
"double " << className <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
2620 fout <<
"{" << std::endl;
2621 fout <<
" double myMVA = 0;" << std::endl;
2622 if (fDoPreselection){
2623 for (
UInt_t ivar = 0; ivar< fIsLowBkgCut.size(); ivar++){
2624 if (fIsLowBkgCut[ivar]){
2625 fout <<
" if (inputValues["<<ivar<<
"] < " << fLowBkgCut[ivar] <<
") return -1; // is background preselection cut" << std::endl;
2627 if (fIsLowSigCut[ivar]){
2628 fout <<
" if (inputValues["<<ivar<<
"] < "<< fLowSigCut[ivar] <<
") return 1; // is signal preselection cut" << std::endl;
2630 if (fIsHighBkgCut[ivar]){
2631 fout <<
" if (inputValues["<<ivar<<
"] > "<<fHighBkgCut[ivar] <<
") return -1; // is background preselection cut" << std::endl;
2633 if (fIsHighSigCut[ivar]){
2634 fout <<
" if (inputValues["<<ivar<<
"] > "<<fHighSigCut[ivar]<<
") return 1; // is signal preselection cut" << std::endl;
2639 if (fBoostType!=
"Grad"){
2640 fout <<
" double norm = 0;" << std::endl;
2642 fout <<
" for (unsigned int itree=0; itree<fForest.size(); itree++){" << std::endl;
2643 fout <<
" "<<nodeName<<
" *current = fForest[itree];" << std::endl;
2644 fout <<
" while (current->GetNodeType() == 0) { //intermediate node" << std::endl;
2645 fout <<
" if (current->GoesRight(inputValues)) current=("<<nodeName<<
"*)current->GetRight();" << std::endl;
2646 fout <<
" else current=("<<nodeName<<
"*)current->GetLeft();" << std::endl;
2647 fout <<
" }" << std::endl;
2648 if (fBoostType==
"Grad"){
2649 fout <<
" myMVA += current->GetResponse();" << std::endl;
2651 if (fUseYesNoLeaf) fout <<
" myMVA += fBoostWeights[itree] * current->GetNodeType();" << std::endl;
2652 else fout <<
" myMVA += fBoostWeights[itree] * current->GetPurity();" << std::endl;
2653 fout <<
" norm += fBoostWeights[itree];" << std::endl;
2655 fout <<
" }" << std::endl;
2656 if (fBoostType==
"Grad"){
2657 fout <<
" return 2.0/(1.0+exp(-2.0*myMVA))-1.0;" << std::endl;
2659 else fout <<
" return myMVA /= norm;" << std::endl;
2660 fout <<
"};" << std::endl << std::endl;
2661 fout <<
"void " << className <<
"::Initialize()" << std::endl;
2662 fout <<
"{" << std::endl;
2664 for (
UInt_t itree=0; itree<GetNTrees(); itree++) {
2665 fout <<
" // itree = " << itree << std::endl;
2666 fout <<
" fBoostWeights.push_back(" << fBoostWeights[itree] <<
");" << std::endl;
2667 fout <<
" fForest.push_back( " << std::endl;
2668 this->MakeClassInstantiateNode((
DecisionTreeNode*)fForest[itree]->GetRoot(), fout, className);
2669 fout <<
" );" << std::endl;
2671 fout <<
" return;" << std::endl;
2672 fout <<
"};" << std::endl;
2673 fout <<
" " << std::endl;
2674 fout <<
"// Clean up" << std::endl;
2675 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
2676 fout <<
"{" << std::endl;
2677 fout <<
" for (unsigned int itree=0; itree<fForest.size(); itree++) { " << std::endl;
2678 fout <<
" delete fForest[itree]; " << std::endl;
2679 fout <<
" }" << std::endl;
2680 fout <<
"}" << std::endl;
2692 fout <<
"#define NN new "<<nodeName << std::endl;
2694 fout <<
" " << std::endl;
2695 fout <<
"#ifndef "<<nodeName<<
"__def" << std::endl;
2696 fout <<
"#define "<<nodeName<<
"__def" << std::endl;
2697 fout <<
" " << std::endl;
2698 fout <<
"class "<<nodeName<<
" {" << std::endl;
2699 fout <<
" " << std::endl;
2700 fout <<
"public:" << std::endl;
2701 fout <<
" " << std::endl;
2702 fout <<
" // constructor of an essentially \"empty\" node floating in space" << std::endl;
2703 fout <<
" "<<nodeName<<
" ( "<<nodeName<<
"* left,"<<nodeName<<
"* right," << std::endl;
2704 if (fUseFisherCuts){
2705 fout <<
" int nFisherCoeff," << std::endl;
2706 for (
UInt_t i=0;i<GetNVariables()+1;i++){
2707 fout <<
" double fisherCoeff"<<i<<
"," << std::endl;
2710 fout <<
" int selector, double cutValue, bool cutType, " << std::endl;
2711 fout <<
" int nodeType, double purity, double response ) :" << std::endl;
2712 fout <<
" fLeft ( left )," << std::endl;
2713 fout <<
" fRight ( right )," << std::endl;
2714 if (fUseFisherCuts) fout <<
" fNFisherCoeff ( nFisherCoeff )," << std::endl;
2715 fout <<
" fSelector ( selector )," << std::endl;
2716 fout <<
" fCutValue ( cutValue )," << std::endl;
2717 fout <<
" fCutType ( cutType )," << std::endl;
2718 fout <<
" fNodeType ( nodeType )," << std::endl;
2719 fout <<
" fPurity ( purity )," << std::endl;
2720 fout <<
" fResponse ( response ){" << std::endl;
2721 if (fUseFisherCuts){
2722 for (
UInt_t i=0;i<GetNVariables()+1;i++){
2723 fout <<
" fFisherCoeff.push_back(fisherCoeff"<<i<<
");" << std::endl;
2726 fout <<
" }" << std::endl << std::endl;
2727 fout <<
" virtual ~"<<nodeName<<
"();" << std::endl << std::endl;
2728 fout <<
" // test event if it decends the tree at this node to the right" << std::endl;
2729 fout <<
" virtual bool GoesRight( const std::vector<double>& inputValues ) const;" << std::endl;
2730 fout <<
" "<<nodeName<<
"* GetRight( void ) {return fRight; };" << std::endl << std::endl;
2731 fout <<
" // test event if it decends the tree at this node to the left " << std::endl;
2732 fout <<
" virtual bool GoesLeft ( const std::vector<double>& inputValues ) const;" << std::endl;
2733 fout <<
" "<<nodeName<<
"* GetLeft( void ) { return fLeft; }; " << std::endl << std::endl;
2734 fout <<
" // return S/(S+B) (purity) at this node (from training)" << std::endl << std::endl;
2735 fout <<
" double GetPurity( void ) const { return fPurity; } " << std::endl;
2736 fout <<
" // return the node type" << std::endl;
2737 fout <<
" int GetNodeType( void ) const { return fNodeType; }" << std::endl;
2738 fout <<
" double GetResponse(void) const {return fResponse;}" << std::endl << std::endl;
2739 fout <<
"private:" << std::endl << std::endl;
2740 fout <<
" "<<nodeName<<
"* fLeft; // pointer to the left daughter node" << std::endl;
2741 fout <<
" "<<nodeName<<
"* fRight; // pointer to the right daughter node" << std::endl;
2742 if (fUseFisherCuts){
2743 fout <<
" int fNFisherCoeff; // =0 if this node doesn use fisher, else =nvar+1 " << std::endl;
2744 fout <<
" std::vector<double> fFisherCoeff; // the fisher coeff (offset at the last element)" << std::endl;
2746 fout <<
" int fSelector; // index of variable used in node selection (decision tree) " << std::endl;
2747 fout <<
" double fCutValue; // cut value appplied on this node to discriminate bkg against sig" << std::endl;
2748 fout <<
" bool fCutType; // true: if event variable > cutValue ==> signal , false otherwise" << std::endl;
2749 fout <<
" int fNodeType; // Type of node: -1 == Bkg-leaf, 1 == Signal-leaf, 0 = internal " << std::endl;
2750 fout <<
" double fPurity; // Purity of node from training"<< std::endl;
2751 fout <<
" double fResponse; // Regression response value of node" << std::endl;
2752 fout <<
"}; " << std::endl;
2753 fout <<
" " << std::endl;
2754 fout <<
"//_______________________________________________________________________" << std::endl;
2755 fout <<
" "<<nodeName<<
"::~"<<nodeName<<
"()" << std::endl;
2756 fout <<
"{" << std::endl;
2757 fout <<
" if (fLeft != NULL) delete fLeft;" << std::endl;
2758 fout <<
" if (fRight != NULL) delete fRight;" << std::endl;
2759 fout <<
"}; " << std::endl;
2760 fout <<
" " << std::endl;
2761 fout <<
"//_______________________________________________________________________" << std::endl;
2762 fout <<
"bool "<<nodeName<<
"::GoesRight( const std::vector<double>& inputValues ) const" << std::endl;
2763 fout <<
"{" << std::endl;
2764 fout <<
" // test event if it decends the tree at this node to the right" << std::endl;
2765 fout <<
" bool result;" << std::endl;
2766 if (fUseFisherCuts){
2767 fout <<
" if (fNFisherCoeff == 0){" << std::endl;
2768 fout <<
" result = (inputValues[fSelector] > fCutValue );" << std::endl;
2769 fout <<
" }else{" << std::endl;
2770 fout <<
" double fisher = fFisherCoeff.at(fFisherCoeff.size()-1);" << std::endl;
2771 fout <<
" for (unsigned int ivar=0; ivar<fFisherCoeff.size()-1; ivar++)" << std::endl;
2772 fout <<
" fisher += fFisherCoeff.at(ivar)*inputValues.at(ivar);" << std::endl;
2773 fout <<
" result = fisher > fCutValue;" << std::endl;
2774 fout <<
" }" << std::endl;
2776 fout <<
" result = (inputValues[fSelector] > fCutValue );" << std::endl;
2778 fout <<
" if (fCutType == true) return result; //the cuts are selecting Signal ;" << std::endl;
2779 fout <<
" else return !result;" << std::endl;
2780 fout <<
"}" << std::endl;
2781 fout <<
" " << std::endl;
2782 fout <<
"//_______________________________________________________________________" << std::endl;
2783 fout <<
"bool "<<nodeName<<
"::GoesLeft( const std::vector<double>& inputValues ) const" << std::endl;
2784 fout <<
"{" << std::endl;
2785 fout <<
" // test event if it decends the tree at this node to the left" << std::endl;
2786 fout <<
" if (!this->GoesRight(inputValues)) return true;" << std::endl;
2787 fout <<
" else return false;" << std::endl;
2788 fout <<
"}" << std::endl;
2789 fout <<
" " << std::endl;
2790 fout <<
"#endif" << std::endl;
2791 fout <<
" " << std::endl;
2800 Log() <<
kFATAL <<
"MakeClassInstantiateNode: started with undefined node" <<
Endl;
2803 fout <<
"NN("<<std::endl;
2810 fout <<
", " <<std::endl;
2817 fout <<
", " << std::endl
2818 << std::setprecision(6);
2819 if (fUseFisherCuts){
2821 for (
UInt_t i=0; i< GetNVariables()+1; i++) {
2846 Int_t nTotS_unWeighted = 0, nTotB_unWeighted = 0;
2848 std::vector<TMVA::BDTEventWrapper> bdtEventSample;
2850 fIsLowSigCut.assign(GetNvar(),
kFALSE);
2851 fIsLowBkgCut.assign(GetNvar(),
kFALSE);
2852 fIsHighSigCut.assign(GetNvar(),
kFALSE);
2853 fIsHighBkgCut.assign(GetNvar(),
kFALSE);
2855 fLowSigCut.assign(GetNvar(),0.);
2856 fLowBkgCut.assign(GetNvar(),0.);
2857 fHighSigCut.assign(GetNvar(),0.);
2858 fHighBkgCut.assign(GetNvar(),0.);
2863 for( std::vector<const TMVA::Event*>::const_iterator it = eventSample.begin(); it != eventSample.end(); ++it ) {
2864 if (DataInfo().IsSignal(*it)){
2865 nTotS += (*it)->GetWeight();
2869 nTotB += (*it)->GetWeight();
2875 for(
UInt_t ivar = 0; ivar < GetNvar(); ivar++ ) {
2877 std::sort( bdtEventSample.begin(),bdtEventSample.end() );
2879 Double_t bkgWeightCtr = 0.0, sigWeightCtr = 0.0;
2880 std::vector<TMVA::BDTEventWrapper>::iterator it = bdtEventSample.begin(), it_end = bdtEventSample.end();
2881 for( ; it != it_end; ++it ) {
2882 if (DataInfo().IsSignal(**it))
2883 sigWeightCtr += (**it)->GetWeight();
2885 bkgWeightCtr += (**it)->GetWeight();
2887 it->SetCumulativeWeight(
false,bkgWeightCtr);
2888 it->SetCumulativeWeight(
true,sigWeightCtr);
2893 Double_t dVal = (DataInfo().GetVariableInfo(ivar).GetMax() - DataInfo().GetVariableInfo(ivar).GetMin())/100. ;
2894 Double_t nSelS, nSelB, effS=0.05, effB=0.05, rejS=0.05, rejB=0.05;
2895 Double_t tmpEffS, tmpEffB, tmpRejS, tmpRejB;
2900 for(
UInt_t iev = 1; iev < bdtEventSample.size(); iev++) {
2903 nSelS = bdtEventSample[iev].GetCumulativeWeight(
true);
2904 nSelB = bdtEventSample[iev].GetCumulativeWeight(
false);
2906 tmpEffS=nSelS/nTotS;
2907 tmpEffB=nSelB/nTotB;
2910 if (nSelS==0 && tmpEffB>effB) {effB=tmpEffB; fLowBkgCut[ivar] = bdtEventSample[iev].GetVal() - dVal; fIsLowBkgCut[ivar]=
kTRUE;}
2911 else if (nSelB==0 && tmpEffS>effS) {effS=tmpEffS; fLowSigCut[ivar] = bdtEventSample[iev].GetVal() - dVal; fIsLowSigCut[ivar]=
kTRUE;}
2912 else if (nSelB==nTotB && tmpRejS>rejS) {rejS=tmpRejS; fHighSigCut[ivar] = bdtEventSample[iev].GetVal() + dVal; fIsHighSigCut[ivar]=
kTRUE;}
2913 else if (nSelS==nTotS && tmpRejB>rejB) {rejB=tmpRejB; fHighBkgCut[ivar] = bdtEventSample[iev].GetVal() + dVal; fIsHighBkgCut[ivar]=
kTRUE;}
2918 Log() <<
kINFO <<
" found and suggest the following possible pre-selection cuts " <<
Endl;
2919 if (fDoPreselection)
Log() <<
kINFO <<
"the training will be done after these cuts... and GetMVA value returns +1, (-1) for a signal (bkg) event that passes these cuts" <<
Endl;
2920 else Log() <<
kINFO <<
"as option DoPreselection was not used, these cuts however will not be performed, but the training will see the full sample"<<
Endl;
2921 for (
UInt_t ivar=0; ivar < GetNvar(); ivar++ ) {
2922 if (fIsLowBkgCut[ivar]){
2923 Log() <<
kINFO <<
" found cut: Bkg if var " << ivar <<
" < " << fLowBkgCut[ivar] <<
Endl;
2925 if (fIsLowSigCut[ivar]){
2926 Log() <<
kINFO <<
" found cut: Sig if var " << ivar <<
" < " << fLowSigCut[ivar] <<
Endl;
2928 if (fIsHighBkgCut[ivar]){
2929 Log() <<
kINFO <<
" found cut: Bkg if var " << ivar <<
" > " << fHighBkgCut[ivar] <<
Endl;
2931 if (fIsHighSigCut[ivar]){
2932 Log() <<
kINFO <<
" found cut: Sig if var " << ivar <<
" > " << fHighSigCut[ivar] <<
Endl;
2947 for (
UInt_t ivar=0; ivar < GetNvar(); ivar++ ) {
2948 if (fIsLowBkgCut[ivar]){
2949 if (ev->
GetValue(ivar) < fLowBkgCut[ivar]) result = -1;
2951 if (fIsLowSigCut[ivar]){
2952 if (ev->
GetValue(ivar) < fLowSigCut[ivar]) result = 1;
2954 if (fIsHighBkgCut[ivar]){
2955 if (ev->
GetValue(ivar) > fHighBkgCut[ivar]) result = -1;
2957 if (fIsHighSigCut[ivar]){
2958 if (ev->
GetValue(ivar) > fHighSigCut[ivar]) result = 1;
Double_t AdaCost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
the AdaCost boosting algorithm takes a simple cost Matrix (currently fixed for all events...
void Train(void)
BDT training.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
virtual const char * GetTitle() const
Returns title of object.
void PreProcessNegativeEventWeights()
o.k.
Double_t AdaBoostR2(std::vector< const TMVA::Event * > &, DecisionTree *dt)
adaption of the AdaBoost to regression problems (see H.Drucker 1997)
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
double dist(Rotation3D const &r1, Rotation3D const &r2)
Random number generator class based on M.
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
MsgLogger & Endl(MsgLogger &ml)
Double_t Boost(std::vector< const TMVA::Event * > &, DecisionTree *dt, UInt_t cls=0)
apply the boosting alogrithim (the algorithm is selecte via the the "option" given in the constructor...
TH1 * GetHist(const TString &alias) const
void WriteMonitoringHistosToFile(void) const
Here we could write some histograms created during the processing to the output file.
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
void AddWeightsXMLTo(void *parent) const
write weights to XML
Double_t GradBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt, UInt_t cls=0)
Calculate the desired response value for each region.
const Ranking * CreateRanking()
Compute ranking of input variables.
void MakeClassSpecificHeader(std::ostream &, const TString &) const
specific class header
TString & ReplaceAll(const TString &s1, const TString &s2)
virtual void SetName(const char *name)
Change (i.e.
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: nTrees number...
Double_t Atof() const
Return floating-point value contained in string.
UInt_t GetNFisherCoeff() const
virtual DecisionTreeNode * GetRight() const
TMVA::DecisionTreeNode * GetEventNode(const TMVA::Event &e) const
get the pointer to the leaf node where a particular event ends up in...
Double_t Bagging()
call it boot-strapping, re-sampling or whatever you like, in the end it is nothing else but applying ...
1-D histogram with a float per channel (see TH1 documentation)}
Short_t Min(Short_t a, Short_t b)
Int_t GetNodeType(void) const
virtual void SetYTitle(const char *title)
virtual void SetTitle(const char *title="")
Set graph title.
void DeterminePreselectionCuts(const std::vector< const TMVA::Event * > &eventSample)
find useful preselection cuts that will be applied before and Decision Tree training.
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
void UpdateTargetsRegression(std::vector< const TMVA::Event * > &, Bool_t first=kFALSE)
Calculate current residuals for all events and update targets for next iteration. ...
Int_t FloorNint(Double_t x)
Double_t GetWeightedQuantile(std::vector< std::pair< Double_t, Double_t > > vec, const Double_t quantile, const Double_t SumOfWeights=0.0)
calculates the quantile of the distribution of the first pair entries weighted with the values in the...
virtual DecisionTreeNode * GetLeft() const
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
void GetHelpMessage() const
Get help message text.
Double_t GetGradBoostMVA(const TMVA::Event *e, UInt_t nTrees)
returns MVA value: -1 for background, 1 for signal
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters accoding to the argument
virtual Double_t Determinant() const
Float_t GetPurity(void) const
Bool_t GetCutType(void) const
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
BDT can handle classification with multiple classes and regression with one regression-target.
void Reset(void)
reset the method, as if it had just been instantiated (forget all training etc.)
TString & Append(const char *cs)
std::vector< std::vector< double > > Data
Double_t RegBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
a special boosting only for Regression ...
void SetMinNodeSize(Double_t sizeInPercent)
virtual Int_t Read(const char *name)
Read contents of object with specified name from the current directory.
void MakeClassInstantiateNode(DecisionTreeNode *n, std::ostream &fout, const TString &className) const
recursively descends a tree and writes the node instance to the output streem
const std::vector< Float_t > & GetMulticlassValues()
get the multiclass MVA response for the BDT classifier
Double_t GradBoostRegression(std::vector< const TMVA::Event * > &, DecisionTree *dt)
Implementation of M_TreeBoost using a Huber loss function as desribed by Friedman 1999...
void InitGradBoost(std::vector< const TMVA::Event * > &)
initialize targets for first tree
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
TString GetElapsedTime(Bool_t Scientific=kTRUE)
const std::vector< Float_t > & GetRegressionValues()
get the regression value generated by the BDTs
void InitEventSample()
initialize the event sample (i.e. reset the boost-weights... etc)
virtual void Delete(Option_t *option="")
Delete this object.
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
std::string GetMethodName(TCppMethod_t)
Service class for 2-Dim histogram classes.
std::map< TString, Double_t > optimize()
void BoostMonitor(Int_t iTree)
fills the ROCIntegral vs Itree from the testSample for the monitoring plots during the training ...
Double_t GetFisherCoeff(Int_t ivar) const
virtual ~MethodBDT(void)
destructor Note: fEventSample and ValidationSample are already deleted at the end of TRAIN When they ...
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Double_t PrivateGetMvaValue(const TMVA::Event *ev, Double_t *err=0, Double_t *errUpper=0, UInt_t useNTrees=0)
Return the MVA value (range [-1;1]) that classifies the event according to the majority vote from the...
void BDT(const TString &fin="TMVA.root")
char * Form(const char *fmt,...)
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Double_t GetWeight(Double_t x) const
SeparationBase * fSepType
void Init(void)
common initialisation with defaults for the BDT-Method
void ReadWeightsFromXML(void *parent)
reads the BDT from the xml file
Double_t AdaBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
the AdaBoost implementation.
Double_t TestTreeQuality(DecisionTree *dt)
test the tree quality.. in terms of Miscalssification
static void SetVarIndex(Int_t iVar)
TGraph * GetGraph(const TString &alias) const
void Print(std::ostream &os, const OptionType &opt)
void ReadWeightsFromStream(std::istream &istr)
read the weights (BDT coefficients)
Double_t ApplyPreselectionCuts(const Event *ev)
aply the preselection cuts before even bothing about any Decision Trees in the GetMVA ...
void UpdateTargets(std::vector< const TMVA::Event * > &, UInt_t cls=0)
Calculate residua for all events;.
Describe directory structure in memory.
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML
static RooMathCoreReg dummy
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
TMatrixTSym< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant Notice that the LU decomposition is used instead of B...
Float_t GetTarget(UInt_t itgt) const
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimzier with the set of paremeters and ranges that are meant to be tuned.
Short_t GetSelector() const
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
void GetBaggedSubSample(std::vector< const TMVA::Event * > &)
fills fEventSample with fBaggedSampleFraction*NEvents random training events
virtual void SetXTitle(const char *title)
virtual void SetPoint(Int_t i, Double_t x, Double_t y)
Set x and y values for point number i.
MethodBDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=0)
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Short_t Max(Short_t a, Short_t b)
A Graph is a graphics object made of two arrays X and Y with npoints each.
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
A TTree object has a header with a name and a title.
void Store(TObject *obj, const char *alias=0)
Double_t Sqrt(Double_t x)
virtual void Set(Int_t n)
Set number of points in the graph Existing coordinates are preserved New coordinates above fNpoints a...
double norm(double *x, double *p)
Float_t GetResponse(void) const
void MakeClassSpecific(std::ostream &, const TString &) const
make ROOT-independent C++ class for classifier response (classifier-specific implementation) ...
Int_t CeilNint(Double_t x)
Float_t GetCutValue(void) const