 ROOT   Reference Guide TRobustEstimator.h
Go to the documentation of this file.
1// @(#)root/physics:$Id$
2// Author: Anna Kreshuk 08/10/2004
3
4
5//////////////////////////////////////////////////////////////////////////////
6//
7// TRobustEstimator
8//
9// Minimum Covariance Determinant Estimator - a Fast Algorithm
10// invented by Peter J.Rousseeuw and Katrien Van Dreissen
11// "A Fast Algorithm for the Minimum covariance Determinant Estimator"
12// Technometrics, August 1999, Vol.41, NO.3
13//
14//////////////////////////////////////////////////////////////////////////////
15
16#ifndef ROOT_TRobustEstimator
17#define ROOT_TRobustEstimator
18
19#include "TArrayI.h"
20#include "TMatrixDSym.h"
21#include "TVectorD.h"
22
23class TRobustEstimator : public TObject {
24
25protected:
26
27 Int_t fNvar; //number of variables
28 Int_t fH; //algorithm parameter, determining the subsample size
29 Int_t fN; //number of observations
30
31 Int_t fVarTemp; //number of variables already added to the data matrix
32 Int_t fVecTemp; //number of observations already added to the data matrix
33
34 Int_t fExact; //if there was an exact fit, stores the number of points on a hyperplane
35
36 TVectorD fMean; //location estimate (mean values)
37 TMatrixDSym fCovariance; //covariance matrix estimate
38 TMatrixDSym fInvcovariance; //inverse of the covariance matrix
39 TMatrixDSym fCorrelation; //correlation matrix
40 TVectorD fRd; //array of robust distances, size n
41 TVectorD fSd; //array of standard deviations
42 TArrayI fOut; //array of indexes of ouliers, size <0.5*n
43 TVectorD fHyperplane; //in case more than fH observations lie on a hyperplane
44 //the equation of this hyperplane is stored here
45
46 TMatrixD fData; //the original data
47
48 //functions needed for evaluation
49
50 void AddToSscp(TMatrixD &sscp, TVectorD &vec);
52
53 void Classic();
54 void Covar(TMatrixD &sscp, TVectorD &m, TMatrixDSym &cov, TVectorD &sd, Int_t nvec);
55 void Correl();
56
57 void CreateSubset(Int_t ntotal, Int_t htotal, Int_t p, Int_t *index, TMatrixD &data,
58 TMatrixD &sscp, Double_t *ndist);
59 void CreateOrtSubset(TMatrixD &dat, Int_t *index, Int_t hmerged, Int_t nmerged, TMatrixD &sscp, Double_t *ndist);
60
61 Double_t CStep(Int_t ntotal, Int_t htotal, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist);
62
63 Int_t Exact(Double_t *ndist);
64 Int_t Exact2(TMatrixD &mstockbig, TMatrixD &cstockbig, TMatrixD &hyperplane,
65 Double_t *deti, Int_t nbest,Int_t kgroup,
66 TMatrixD &sscp, Double_t *ndist);
67
68 Int_t Partition(Int_t nmini, Int_t *indsubdat);
69 Int_t RDist(TMatrixD &sscp);
70 void RDraw(Int_t *subdat, Int_t ngroup, Int_t *indsubdat);
71
72 Double_t KOrdStat(Int_t ntotal, Double_t *arr, Int_t k, Int_t *work);
73
74public:
75
77 TRobustEstimator(Int_t nvectors, Int_t nvariables, Int_t hh=0);
78 ~TRobustEstimator() override{;}
79
82
83 void Evaluate();
84 void EvaluateUni(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh=0);
85
86 Int_t GetBDPoint(); //returns the breakdown point of the algorithm
87
88 /// returns a reference to the data matrix
89 const TMatrixD & GetData() { return fData; }
90
91 void GetCovariance(TMatrixDSym &matr); //returns robust covariance matrix estimate
92 const TMatrixDSym* GetCovariance() const{return &fCovariance;}
93 void GetCorrelation(TMatrixDSym &matr); //returns robust correlation matrix estimate
94 const TMatrixDSym* GetCorrelation() const{return &fCorrelation;}
95 void GetHyperplane(TVectorD &vec); //if the data lies on a hyperplane, returns this hyperplane
96 const TVectorD* GetHyperplane() const; //if the data lies on a hyperplane, returns this hyperplane
97 Int_t GetNHyp() {return fExact;} //returns the number of points on a hyperplane
98 void GetMean(TVectorD &means); //returns robust mean vector estimate
99 const TVectorD* GetMean() const {return &fMean;} //returns robust mean vector estimate
100 void GetRDistances(TVectorD &rdist); //returns robust distances of all observations
101 const TVectorD* GetRDistances() const {return &fRd;} //returns robust distances of all observations
103 Int_t GetNvar() const {return fNvar;}
104 const TArrayI* GetOuliers() const{return &fOut;} //returns an array of outlier indexes
105 Int_t GetNOut(); //returns the number of points outside the tolerance ellipsoid.
106 //ONLY those with robust distances significantly larger than the
107 //cutoff value, should be considered outliers!
108 Double_t GetChiQuant(Int_t i) const;
109
110 ClassDefOverride(TRobustEstimator,1) //Minimum Covariance Determinant Estimator
111
112};
113
114
115#endif
116
int Int_t
Definition: RtypesCore.h:45
double Double_t
Definition: RtypesCore.h:59
#define ClassDefOverride(name, id)
Definition: Rtypes.h:339
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Array of integers (32 bits per element).
Definition: TArrayI.h:27
Mother of all ROOT objects.
Definition: TObject.h:37
Minimum Covariance Determinant Estimator - a Fast Algorithm invented by Peter J.Rousseeuw and Katrien...
Int_t RDist(TMatrixD &sscp)
Calculates robust distances.Then the samples with robust distances greater than a cutoff value (0....
TMatrixDSym fCovariance
Double_t CStep(Int_t ntotal, Int_t htotal, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist)
from the input htotal-subset constructs another htotal subset with lower determinant
const TMatrixDSym * GetCovariance() const
TMatrixDSym fInvcovariance
void CreateSubset(Int_t ntotal, Int_t htotal, Int_t p, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist)
creates a subset of htotal elements from ntotal elements first, p+1 elements are drawn randomly(witho...
void Covar(TMatrixD &sscp, TVectorD &m, TMatrixDSym &cov, TVectorD &sd, Int_t nvec)
calculates mean and covariance
Int_t Exact(Double_t *ndist)
for the exact fit situations returns number of observations on the hyperplane
Double_t KOrdStat(Int_t ntotal, Double_t *arr, Int_t k, Int_t *work)
because I need an Int_t work array
Int_t GetNumberObservations() const
Int_t GetNOut()
returns the number of outliers
TRobustEstimator()
this constructor should be used in a univariate case: first call this constructor,...
~TRobustEstimator() override
void Correl()
transforms covariance matrix into correlation matrix
adds a column to the data matrix it is assumed that the column has size fN variable fVarTemp keeps th...
void RDraw(Int_t *subdat, Int_t ngroup, Int_t *indsubdat)
Draws ngroup nonoverlapping subdatasets out of a dataset of size n such that the selected case number...
void Classic()
called when h=n.
void Evaluate()
Finds the estimate of multivariate mean and variance.
const TVectorD * GetMean() const
Int_t Exact2(TMatrixD &mstockbig, TMatrixD &cstockbig, TMatrixD &hyperplane, Double_t *deti, Int_t nbest, Int_t kgroup, TMatrixD &sscp, Double_t *ndist)
This function is called if determinant of the covariance matrix of a subset=0.
const TArrayI * GetOuliers() const
Int_t GetNvar() const
const TVectorD * GetRDistances() const
const TMatrixD & GetData()
returns a reference to the data matrix
Int_t GetBDPoint()
returns the breakdown point of the algorithm
void CreateOrtSubset(TMatrixD &dat, Int_t *index, Int_t hmerged, Int_t nmerged, TMatrixD &sscp, Double_t *ndist)
creates a subset of hmerged vectors with smallest orthogonal distances to the hyperplane hyp*(x1-m...
adds a vector to the data matrix it is supposed that the vector is of size fNvar
TMatrixDSym fCorrelation
const TVectorD * GetHyperplane() const
if the points are on a hyperplane, returns this hyperplane
Int_t Partition(Int_t nmini, Int_t *indsubdat)
divides the elements into approximately equal subgroups number of elements in each subgroup is stored...
const TMatrixDSym * GetCorrelation() const
Double_t GetChiQuant(Int_t i) const
returns the chi2 quantiles
void EvaluateUni(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh=0)
for the univariate case estimates of location and scatter are returned in mean and sigma parameters t...