Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
SeparationBase.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : SeparationBase *
8 * *
9 * *
10 * Description: An interface to different separation criteria used in various *
11 * training algorithms, as there are: *
12 * *
13 * There are two things: the Separation Index, and the Separation Gain *
14 * Separation Index: *
15 * Measure of the "purity" of a sample. If all elements (events) in the *
16 * sample belong to the same class (e.g. signal or backgr), than the *
17 * separation index is 0 (meaning 100% purity (or 0% purity as it is *
18 * symmetric. The index becomes maximal, for perfectly mixed samples *
19 * eg. purity=50% , N_signal = N_bkg *
20 * *
21 * Separation Gain: *
22 * the measure of how the quality of separation of the sample increases *
23 * by splitting the sample e.g. into a "left-node" and a "right-node" *
24 * (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) *
25 * this is then the quality criterion which is optimized for when trying *
26 * to increase the information in the system (making the best selection *
27 * *
28 * Authors (alphabetical): *
29 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
30 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
31 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
32 * *
33 * Copyright (c) 2005: *
34 * CERN, Switzerland *
35 * U. of Victoria, Canada *
36 * Heidelberg U., Germany *
37 * *
38 * Redistribution and use in source and binary forms, with or without *
39 * modification, are permitted according to the terms listed in LICENSE *
40 * (see tmva/doc/LICENSE) *
41 **********************************************************************************/
42
43/*! \class TMVA::SeparationBase
44\ingroup TMVA
45An interface to calculate the "SeparationGain" for different
46separation criteria used in various training algorithms
47
48There are two things: the Separation Index, and the Separation Gain
49Separation Index:
50Measure of the "purity" of a sample. If all elements (events) in the
51sample belong to the same class (e.g. signal or background), than the
52separation index is 0 (meaning 100% purity (or 0% purity as it is
53symmetric. The index becomes maximal, for perfectly mixed samples
54eg. purity=50% , N_signal = N_bkg
55
56Separation Gain:
57the measure of how the quality of separation of the sample increases
58by splitting the sample e.g. into a "left-node" and a "right-node"
59(N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
60this is then the quality criterion which is optimized for when trying
61to increase the information in the system (making the best selection
62*/
63#include "TMVA/SeparationBase.h"
64
65#include "TMath.h"
66#include "TString.h"
67
68#include <iostream>
69#include <limits>
70
71
72////////////////////////////////////////////////////////////////////////////////
73/// Constructor.
74
76fName(""),
77 fPrecisionCut(TMath::Sqrt(std::numeric_limits<double>::epsilon()))
78{
79 // default constructor
80}
81
82////////////////////////////////////////////////////////////////////////////////
83/// Copy constructor.
84
86 fName(s.fName),
87 fPrecisionCut(TMath::Sqrt(std::numeric_limits<double>::epsilon()))
88{
89 // copy constructor
90}
91
92////////////////////////////////////////////////////////////////////////////////
93/// Separation Gain:
94/// the measure of how the quality of separation of the sample increases
95/// by splitting the sample e.g. into a "left-node" and a "right-node"
96/// (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
97/// this is then the quality criterion which is optimized for when trying
98/// to increase the information in the system (making the best selection
99
101 const Double_t nTotS, const Double_t nTotB)
102{
103 if ( (nTotS-nSelS)==nSelS && (nTotB-nSelB)==nSelB) return 0.;
104
105 // Double_t parentIndex = (nTotS+nTotB) *this->GetSeparationIndex(nTotS,nTotB);
106
107 // Double_t leftIndex = ( ((nTotS - nSelS) + (nTotB - nSelB))
108 // * this->GetSeparationIndex(nTotS-nSelS,nTotB-nSelB) );
109 // Double_t rightIndex = (nSelS+nSelB) * this->GetSeparationIndex(nSelS,nSelB);
110
111
112 Double_t parentIndex = this->GetSeparationIndex(nTotS,nTotB);
113
114 Double_t leftIndex = ( ((nTotS - nSelS) + (nTotB - nSelB))/(nTotS+nTotB)
115 * this->GetSeparationIndex(nTotS-nSelS,nTotB-nSelB) );
116 Double_t rightIndex = (nSelS+nSelB)/(nTotS+nTotB) * this->GetSeparationIndex(nSelS,nSelB);
117
119 //Double_t diff = (parentIndex - leftIndex - rightIndex)/(nTotS+nTotB);
120
121 if(diff<fPrecisionCut ) {
122 // std::cout << " Warning value in GetSeparation is below numerical precision "
123 // << diff/parentIndex
124 // << std::endl;
125 return 0;
126 }
127
128 return diff;
129}
130
131
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
SeparationBase()
Constructor.
TMath.
Definition TMathBase.h:35