Logo ROOT   6.10/09
Reference Guide
RegressionVariance.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RegressionVariance *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: Calculate the separation criteria used in regression *
11  * *
12  * There are two things: the Separation Index, and the Separation Gain *
13  * Separation Index: *
14  * Measure of the "Variance" of a sample. *
15  * *
16  * Separation Gain: *
17  * the measure of how the quality of separation of the sample increases *
18  * by splitting the sample e.g. into a "left-node" and a "right-node" *
19  * (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) *
20  * this is then the quality criteria which is optimized for when trying *
21  * to increase the information in the system (making the best selection *
22  * *
23  * *
24  * Authors (alphabetical): *
25  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
26  * *
27  * Copyright (c) 2005: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * Heidelberg U., Germany *
31  * *
32  * Redistribution and use in source and binary forms, with or without *
33  * modification, are permitted according to the terms listed in LICENSE *
34  * (http://tmva.sourceforge.net/LICENSE) *
35  **********************************************************************************/
36 #include <iostream>
37 #include "TMath.h"
39 
41 
42 /*! \class TMVA::RegressionVariance
43 \ingroup TMVA
44 Calculate the "SeparationGain" for Regression analysis
45 separation criteria used in various training algorithms
46 
47 There are two things: the Separation Index, and the Separation Gain
48 Separation Index:
49 Measure of the "Variance" of a sample.
50 
51 Separation Gain:
52 the measure of how the quality of separation of the sample increases
53 by splitting the sample e.g. into a "left-node" and a "right-node"
54 (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
55 this is then the quality criteria which is optimized for when trying
56 to increase the information in the system (making the best selection
57 */
58 
59 ////////////////////////////////////////////////////////////////////////////////
60 /// Separation Gain:
61 /// the measure of how the quality of separation of the sample increases
62 /// by splitting the sample e.g. into a "left-node" and a "right-node"
63 /// (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
64 /// this is then the quality criteria which is optimized for when trying
65 /// to increase the information in the system
66 /// for the Regression: as the "Gain is maximised", the RMS (sqrt(variance))
67 /// which is used as a "separation" index should be as small as possible.
68 /// the "figure of merit" here has to be -(rms left+rms-right) or 1/rms...
69 
70 Double_t TMVA::RegressionVariance::GetSeparationGain(const Double_t nLeft,
71  const Double_t targetLeft, const Double_t target2Left,
72  const Double_t nTot,
73  const Double_t targetTot, const Double_t target2Tot)
74 {
75 
76  if ( nTot==nLeft || nLeft==0 ) return 0.;
77 
78  Double_t parentIndex = nTot * this->GetSeparationIndex(nTot,targetTot,target2Tot);
79  Double_t leftIndex = ( (nTot - nLeft) * this->GetSeparationIndex(nTot-nLeft,targetTot-targetLeft,target2Tot-target2Left) );
80  Double_t rightIndex = nLeft * this->GetSeparationIndex(nLeft,targetLeft,target2Left);
81 
82  // return 1/ (leftIndex + rightIndex);
83  return (parentIndex - leftIndex - rightIndex)/(parentIndex);
84 }
85 
86 ////////////////////////////////////////////////////////////////////////////////
87 /// Separation Index: a simple Variance
88 
90  const Double_t target, const Double_t target2)
91 {
92  // return TMath::Sqrt(( target2 - target*target/n) / n);
93  return ( target2 - target*target/n) / n;
94 
95 }
96 
97 
98 
Calculate the "SeparationGain" for Regression analysis separation criteria used in various training a...
virtual Double_t GetSeparationIndex(const Double_t n, const Double_t target, const Double_t target2)
Separation Index: a simple Variance.
#define ClassImp(name)
Definition: Rtypes.h:336
double Double_t
Definition: RtypesCore.h:55
Abstract ClassifierFactory template that handles arbitrary types.
const Int_t n
Definition: legend1.C:16