Logo ROOT  
Reference Guide
RegressionVariance.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : RegressionVariance *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: Calculate the separation criteria used in regression *
11 * *
12 * There are two things: the Separation Index, and the Separation Gain *
13 * Separation Index: *
14 * Measure of the "Variance" of a sample. *
15 * *
16 * Separation Gain: *
17 * the measure of how the quality of separation of the sample increases *
18 * by splitting the sample e.g. into a "left-node" and a "right-node" *
19 * (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) *
20 * this is then the quality criteria which is optimized for when trying *
21 * to increase the information in the system (making the best selection *
22 * *
23 * *
24 * Authors (alphabetical): *
25 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
26 * *
27 * Copyright (c) 2005: *
28 * CERN, Switzerland *
29 * U. of Victoria, Canada *
30 * Heidelberg U., Germany *
31 * *
32 * Redistribution and use in source and binary forms, with or without *
33 * modification, are permitted according to the terms listed in LICENSE *
34 * (http://tmva.sourceforge.net/LICENSE) *
35 **********************************************************************************/
36#include "TMath.h"
38
40
41/*! \class TMVA::RegressionVariance
42\ingroup TMVA
43Calculate the "SeparationGain" for Regression analysis
44separation criteria used in various training algorithms
45
46There are two things: the Separation Index, and the Separation Gain
47Separation Index:
48Measure of the "Variance" of a sample.
49
50Separation Gain:
51the measure of how the quality of separation of the sample increases
52by splitting the sample e.g. into a "left-node" and a "right-node"
53(N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
54this is then the quality criteria which is optimized for when trying
55to increase the information in the system (making the best selection
56*/
57
58////////////////////////////////////////////////////////////////////////////////
59/// Separation Gain:
60/// the measure of how the quality of separation of the sample increases
61/// by splitting the sample e.g. into a "left-node" and a "right-node"
62/// (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
63/// this is then the quality criteria which is optimized for when trying
64/// to increase the information in the system
65/// for the Regression: as the "Gain is maximised", the RMS (sqrt(variance))
66/// which is used as a "separation" index should be as small as possible.
67/// the "figure of merit" here has to be -(rms left+rms-right) or 1/rms...
68
70 const Double_t targetLeft, const Double_t target2Left,
71 const Double_t nTot,
72 const Double_t targetTot, const Double_t target2Tot)
73{
74
75 if ( nTot==nLeft || nLeft==0 ) return 0.;
76
77 Double_t parentIndex = nTot * this->GetSeparationIndex(nTot,targetTot,target2Tot);
78 Double_t leftIndex = ( (nTot - nLeft) * this->GetSeparationIndex(nTot-nLeft,targetTot-targetLeft,target2Tot-target2Left) );
79 Double_t rightIndex = nLeft * this->GetSeparationIndex(nLeft,targetLeft,target2Left);
80
81 // return 1/ (leftIndex + rightIndex);
82 return (parentIndex - leftIndex - rightIndex)/(parentIndex);
83}
84
85////////////////////////////////////////////////////////////////////////////////
86/// Separation Index: a simple Variance
87
89 const Double_t target, const Double_t target2)
90{
91 // return TMath::Sqrt(( target2 - target*target/n) / n);
92 return ( target2 - target*target/n) / n;
93
94}
95
96
97
double Double_t
Definition: RtypesCore.h:59
#define ClassImp(name)
Definition: Rtypes.h:364
Calculate the "SeparationGain" for Regression analysis separation criteria used in various training a...
virtual Double_t GetSeparationIndex(const Double_t n, const Double_t target, const Double_t target2)
Separation Index: a simple Variance.
Double_t GetSeparationGain(const Double_t nLeft, const Double_t targetLeft, const Double_t target2Left, const Double_t nTot, const Double_t targetTot, const Double_t target2Tot)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
const Int_t n
Definition: legend1.C:16