105 0.454937, 1.38629, 2.36597, 3.35670, 4.35146, 5.34812, 6.34581, 7.34412, 8.34283,
106 9.34182, 10.34, 11.34, 12.34, 13.34, 14.34, 15.34, 16.34, 17.34, 18.34, 19.34,
107 20.34, 21.34, 22.34, 23.34, 24.34, 25.34, 26.34, 27.34, 28.34, 29.34, 30.34,
108 31.34, 32.34, 33.34, 34.34, 35.34, 36.34, 37.34, 38.34, 39.34, 40.34,
109 41.34, 42.34, 43.34, 44.34, 45.34, 46.34, 47.34, 48.34, 49.33};
112 5.02389, 7.3776,9.34840,11.1433,12.8325,
113 14.4494,16.0128,17.5346,19.0228,20.4831,21.920,23.337,
114 24.736,26.119,27.488,28.845,30.191,31.526,32.852,34.170,
115 35.479,36.781,38.076,39.364,40.646,41.923,43.194,44.461,
116 45.722,46.979,48.232,49.481,50.725,51.966,53.203,54.437,
117 55.668,56.896,58.120,59.342,60.561,61.777,62.990,64.201,
118 65.410,66.617,67.821,69.022,70.222,71.420};
132 fCovariance(nvariables),
133 fInvcovariance(nvariables),
134 fCorrelation(nvariables),
138 fHyperplane(nvariables),
139 fData(nvectors, nvariables)
141 if ((nvectors<=1)||(nvariables<=0)){
142 Error(
"TRobustEstimator",
"Not enough vectors or variables");
146 Error(
"TRobustEstimator",
"For the univariate case, use the default constructor and EvaluateUni() function");
154 Warning(
"TRobustEstimator",
"chosen h is too small, default h is taken instead");
213 Warning(
"Evaluate",
"Chosen h = #observations, so classic estimates of location and scatter will be calculated");
230 for (i=0; i<nbest; i++)
244 for (k=0; k<k1; k++) {
248 for (i=0; i<
fH; i++) {
249 for(j=0; j<
fNvar; j++)
280 if(det<deti[maxind]) {
282 for(ii=0; ii<
fNvar; ii++) {
283 mstock(maxind, ii)=
fMean(ii);
284 for(jj=0; jj<
fNvar; jj++)
293 for (i=0; i<nbest; i++) {
294 for(ii=0; ii<
fNvar; ii++) {
295 fMean(ii)=mstock(i, ii);
296 for (jj=0; jj<
fNvar; jj++)
308 for(ii=0; ii<
fNvar; ii++) {
309 mstock(i,ii)=
fMean(ii);
310 for (jj=0; jj<
fNvar; jj++)
316 for(ii=0; ii<
fNvar; ii++) {
317 fMean(ii)=mstock(detind,ii);
319 for(jj=0; jj<
fNvar; jj++)
323 if (deti[detind]!=0) {
331 for (i=0; i<
fN; i++) {
353 for (ii=0; ii<5; ii++)
359 for (ii=0; ii<5; ii++)
364 for (
int iii = 0; iii <
sum; ++iii) subdat[iii] = -999;
365 RDraw(subdat, nsub, indsubdat);
366 for (
int iii = 0; iii <
sum; ++iii) {
367 if (subdat[iii] < 0 || subdat[iii] >=
fN ) {
368 Error(
"Evaluate",
"subdat index is invalid subdat[%d] = %d",iii, subdat[iii] );
374 Int_t nbestsub=nbest*nsub;
378 for (i=0; i<nbestsub; i++) {
379 for(j=0; j<
fNvar; j++)
390 for (
Int_t kgroup=0; kgroup<nsub; kgroup++) {
392 Int_t ntemp=indsubdat[kgroup];
394 for (i=0; i<kgroup; i++)
399 for(i=0; i<ntemp; i++) {
400 for (j=0; j<
fNvar; j++) {
401 dattemp(i,j)=
fData[subdat[temp+i]][j];
406 for (i=0; i<nbest; i++)
409 for(k=0; k<k2; k++) {
412 for (i=0; i<htemp; i++) {
413 for(j=0; j<
fNvar; j++) {
414 vec(j)=dattemp(index[i],j);
421 par =
Exact2(mstockbig, cstockbig, hyperplane, deti, nbest, kgroup, sscp,ndist);
433 det =
CStep(ntemp, htemp, index, dattemp, sscp, ndist);
435 par=
Exact2(mstockbig, cstockbig, hyperplane, deti, nbest, kgroup, sscp, ndist);
447 det=
CStep(ntemp,htemp, index, dattemp, sscp, ndist);
449 par=
Exact2(mstockbig, cstockbig, hyperplane, deti, nbest, kgroup, sscp,ndist);
463 if(det<deti[maxind]) {
465 for(i=0; i<
fNvar; i++) {
466 mstockbig(nbest*kgroup+maxind,i)=
fMean(i);
467 for(j=0; j<
fNvar; j++) {
479 if (deti[maxind]<kEps)
484 for(i=0; i<nbest; i++) {
485 detibig[kgroup*nbest + i]=deti[i];
496 for(i=0; i<
sum; i++) {
497 for (j=0; j<
fNvar; j++)
498 datmerged(i,j)=
fData[subdat[i]][j];
504 for(k=0; k<nbestsub; k++) {
506 for(ii=0; ii<
fNvar; ii++) {
507 fMean(ii)=mstockbig(k,ii);
508 for(jj=0; jj<
fNvar; jj++)
512 for(i=0; i<
fNvar; i++)
517 det=
CStep(
sum, hmerged, index, datmerged, sscp, ndist);
534 det=
CStep(
sum, hmerged, index, datmerged, sscp, ndist);
548 for(i=0; i<
fNvar; i++) {
549 mstockbig(k,i)=
fMean(i);
550 for(j=0; j<
fNvar; j++) {
559 for(i=0; i<
fNvar; i++) {
560 fMean(i)=mstockbig(minind,i);
562 for(j=0; j<
fNvar; j++)
586 for (i=0; i<
fN; i++) {
612 Double_t faclts[]={2.6477,2.5092,2.3826,2.2662,2.1587,2.0589,1.9660,1.879,1.7973,1.7203,1.6473};
625 Int_t len=nvectors-hh;
627 for(
Int_t i=0; i<len; i++)
629 for(
Int_t jint=0; jint<len; jint++) {
631 for (
Int_t j=0; j<hh; j++) {
632 aw[jint]+=data[index[j+jint]];
634 sq+=data[index[j]]*data[index[j]];
636 aw2[jint]=aw[jint]*aw[jint]/hh;
641 slutn[ndup]=aw[jint];
644 sq=sq - data[index[jint-1]]*data[index[jint-1]]+
645 data[index[jint+hh]]*data[index[jint+hh]]-
646 aw2[jint]+aw2[jint-1];
650 slutn[ndup]=aw[jint];
655 slutn[ndup]=aw[jint];
661 slutn[0]=slutn[
Int_t((ndup)/2)]/hh;
686 if (i < 0 || i >= 50)
return 0;
696 Warning(
"GetCovariance",
"provided matrix is of the wrong size, it will be resized");
708 Warning(
"GetCorrelation",
"provided matrix is of the wrong size, it will be resized");
720 Error(
"GetHyperplane",
"the data doesn't lie on a hyperplane!\n");
733 Error(
"GetHyperplane",
"the data doesn't lie on a hyperplane!\n");
737 Warning(
"GetHyperPlane",
"provided vector is of the wrong size, it will be resized");
749 Warning(
"GetMean",
"provided vector is of the wrong size, it will be resized");
761 Warning(
"GetRDistances",
"provided vector is of the wrong size, it will be resized");
781 for (j=1; j<
fNvar+1; j++) {
782 sscp(0, j) +=
vec(j-1);
783 sscp(j, 0) = sscp(0, j);
785 for (i=1; i<
fNvar+1; i++) {
786 for (j=1; j<
fNvar+1; j++) {
787 sscp(i, j) +=
vec(i-1)*
vec(j-1);
830 for (i=0; i<
fNvar; i++) {
832 sd[i]=sscp(i+1, i+1);
833 f=(sd[i]-
m(i)*
m(i)/nvec)/(nvec-1);
838 for (i=0; i<
fNvar; i++) {
839 for (j=0; j<
fNvar; j++) {
840 cov(i, j)=sscp(i+1, j+1)-nvec*
m(i)*
m(j);
853 for(j=0; j<
fNvar; j++)
855 for(i=0; i<
fNvar; i++) {
856 for (j=0; j<
fNvar; j++) {
884 for(i=0; i<ntotal; i++)
887 for (i=0; i<p+1; i++) {
890 for(j=0; j<=i-1; j++) {
908 for (i=0; i<p+1; i++) {
909 for (j=0; j<
fNvar; j++) {
910 vec[j]=data[index[i]][j];
918 while((det<kEps)&&(nindex < htotal)) {
926 for(i=0; i<nindex; i++) {
932 }
while(repeat==
kTRUE);
937 for(j=0; j<
fNvar; j++)
938 vec[j]=data[index[nindex-1]][j];
949 for(j=0; j<ntotal; j++) {
951 for(i=0; i<
fNvar; i++)
952 temp[i]=data[j][i] -
fMean(i);
954 for(i=0; i<
fNvar; i++)
955 ndist[j]+=(data[j][i]-
fMean(i))*temp[i];
957 KOrdStat(ntotal, ndist, htotal-1,index);
971 for (i=0; i<nmerged; i++) {
973 for(j=0; j<
fNvar; j++) {
978 KOrdStat(nmerged, ndist, hmerged-1, index);
980 for (i=0; i<hmerged; i++) {
981 for(j=0; j<
fNvar; j++)
982 vec[j]=dat[index[i]][j];
1009 for(j=0; j<ntotal; j++) {
1011 for(i=0; i<
fNvar; i++)
1012 temp[i]=data[j][i]-
fMean[i];
1014 for(i=0; i<
fNvar; i++)
1015 ndist[j]+=(data[j][i]-
fMean[i])*temp[i];
1019 KOrdStat(ntotal, ndist, htotal-1, index);
1022 for (i=0; i<htotal; i++) {
1023 for (j=0; j<
fNvar; j++)
1024 temp[j]=data[index[i]][j];
1044 for (j=0; j<
fNvar; j++) {
1048 for (i=0; i<
fN; i++) {
1050 for(j=0; j<
fNvar; j++) {
1057 for (i=0; i<
fN; i++) {
1058 if(ndist[i] < 1
e-14) nhyp++;
1084 for (i=0; i<
fN; i++) {
1085 if(ndist[i]<1
e-14) {
1086 for (j=0; j<
fNvar; j++)
1101 for(i=0; i<
fNvar; i++) {
1102 mstockbig(nbest*kgroup+maxind,i)=
fMean(i);
1104 for(j=0; j<
fNvar; j++) {
1121 if ((
fN>=2*nmini) && (
fN<=(3*nmini-1))) {
1126 indsubdat[0]=indsubdat[1]=
Int_t(
fN/2);
1130 if((
fN>=3*nmini) && (
fN<(4*nmini -1))) {
1132 indsubdat[0]=indsubdat[1]=indsubdat[2]=
Int_t(
fN/3);
1137 else indsubdat[2]=
Int_t(
fN/3)+1;
1142 if((
fN>=4*nmini)&&(
fN<=(5*nmini-1))){
1143 if (
fN%4==0) indsubdat[0]=indsubdat[1]=indsubdat[2]=indsubdat[3]=
Int_t(
fN/4);
1147 if(
fN%4==1) indsubdat[2]=indsubdat[3]=
Int_t(
fN/4);
1152 if(
fN%4==3) indsubdat[2]=indsubdat[3]=
Int_t(
fN/4)+1;
1156 for(
Int_t i=0; i<5; i++)
1182 for (i=0; i<
fN; i++) {
1184 for(j=0; j<
fNvar; j++) {
1188 for(j=0; j<
fNvar; j++) {
1202 for (i=0; i<
fN; i++) {
1204 for(j=0; j<
fNvar; j++) {
1209 for(j=0; j<
fNvar; j++) {
1217 for(i=0; i<
fN; i++) {
1218 if (
fRd[i]<=cutoff) {
1219 for(j=0; j<
fNvar; j++)
1220 temp[j]=
fData[i][j];
1240 for (k=1; k<=ngroup; k++) {
1241 for (
m=1;
m<=indsubdat[k-1];
m++) {
1248 subdat[jndex-1]=nrand+jndex-2;
1249 for (i=1; i<=jndex-1; i++) {
1250 if(subdat[i-1] > nrand+i-2) {
1251 for(j=jndex; j>=i+1; j--) {
1252 subdat[j-1]=subdat[j-2];
1255 subdat[i-1]=nrand+i-2;
1269 const Int_t kWorkMax=100;
1273 Int_t workLocal[kWorkMax];
1281 if (ntotal > kWorkMax) {
1282 isAllocated =
kTRUE;
1283 ind =
new Int_t[ntotal];
1287 for (
Int_t ii=0; ii<ntotal; ii++) {
1295 if (ir ==
l+1 &&
a[ind[ir]]<
a[ind[
l]])
1296 {temp = ind[
l]; ind[
l]=ind[ir]; ind[ir]=temp;}
1303 {temp = ind[mid]; ind[mid]=ind[
l+1]; ind[
l+1]=temp;}
1304 if (
a[ind[
l]]>
a[ind[ir]])
1305 {temp = ind[
l]; ind[
l]=ind[ir]; ind[ir]=temp;}
1307 if (
a[ind[
l+1]]>
a[ind[ir]])
1308 {temp=ind[
l+1]; ind[
l+1]=ind[ir]; ind[ir]=temp;}
1310 if (
a[ind[
l]]>
a[ind[
l+1]])
1311 {temp = ind[
l]; ind[
l]=ind[
l+1]; ind[
l+1]=temp;}
1317 do i++;
while (
a[ind[i]]<
a[arr]);
1318 do j--;
while (
a[ind[j]]>
a[arr]);
1320 {temp=ind[i]; ind[i]=ind[j]; ind[j]=temp;}
1324 if (j>=rk) ir = j-1;
R__EXTERN TRandom * gRandom
const Double_t kChiMedian[50]
const Double_t kChiQuant[50]
void Set(Int_t n)
Set size of this array to n ints.
Cholesky Decomposition class.
Bool_t Invert(TMatrixDSym &inv)
For a symmetric matrix A(m,m), its inverse A_inv(m,m) is returned .
const TVectorD & GetEigenValues() const
const TMatrixD & GetEigenVectors() const
virtual Double_t Determinant() const
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
virtual Double_t Uniform(Double_t x1=1)
Returns a uniform deviate on the interval (0, x1).
Minimum Covariance Determinant Estimator - a Fast Algorithm invented by Peter J.Rousseeuw and Katrien...
Int_t RDist(TMatrixD &sscp)
Calculates robust distances.Then the samples with robust distances greater than a cutoff value (0....
Double_t CStep(Int_t ntotal, Int_t htotal, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist)
from the input htotal-subset constructs another htotal subset with lower determinant
const TMatrixDSym * GetCovariance() const
TMatrixDSym fInvcovariance
void CreateSubset(Int_t ntotal, Int_t htotal, Int_t p, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist)
creates a subset of htotal elements from ntotal elements first, p+1 elements are drawn randomly(witho...
void Covar(TMatrixD &sscp, TVectorD &m, TMatrixDSym &cov, TVectorD &sd, Int_t nvec)
calculates mean and covariance
Int_t Exact(Double_t *ndist)
for the exact fit situations returns number of observations on the hyperplane
Double_t KOrdStat(Int_t ntotal, Double_t *arr, Int_t k, Int_t *work)
because I need an Int_t work array
Int_t GetNOut()
returns the number of outliers
TRobustEstimator()
this constructor should be used in a univariate case: first call this constructor,...
void Correl()
transforms covariance matrix into correlation matrix
void AddColumn(Double_t *col)
adds a column to the data matrix it is assumed that the column has size fN variable fVarTemp keeps th...
void RDraw(Int_t *subdat, Int_t ngroup, Int_t *indsubdat)
Draws ngroup nonoverlapping subdatasets out of a dataset of size n such that the selected case number...
void Classic()
called when h=n.
void Evaluate()
Finds the estimate of multivariate mean and variance.
const TVectorD * GetMean() const
Int_t Exact2(TMatrixD &mstockbig, TMatrixD &cstockbig, TMatrixD &hyperplane, Double_t *deti, Int_t nbest, Int_t kgroup, TMatrixD &sscp, Double_t *ndist)
This function is called if determinant of the covariance matrix of a subset=0.
const TVectorD * GetRDistances() const
Int_t GetBDPoint()
returns the breakdown point of the algorithm
void CreateOrtSubset(TMatrixD &dat, Int_t *index, Int_t hmerged, Int_t nmerged, TMatrixD &sscp, Double_t *ndist)
creates a subset of hmerged vectors with smallest orthogonal distances to the hyperplane hyp[1]*(x1-m...
void AddRow(Double_t *row)
adds a vector to the data matrix it is supposed that the vector is of size fNvar
const TVectorD * GetHyperplane() const
if the points are on a hyperplane, returns this hyperplane
Int_t Partition(Int_t nmini, Int_t *indsubdat)
divides the elements into approximately equal subgroups number of elements in each subgroup is stored...
const TMatrixDSym * GetCorrelation() const
Double_t GetChiQuant(Int_t i) const
returns the chi2 quantiles
void EvaluateUni(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh=0)
for the univariate case estimates of location and scatter are returned in mean and sigma parameters t...
void AddToSscp(TMatrixD &sscp, TVectorD &vec)
update the sscp matrix with vector vec
void ClearSscp(TMatrixD &sscp)
clear the sscp matrix, used for covariance and mean calculation
TVectorT< Element > & ResizeTo(Int_t lwb, Int_t upb)
Resize the vector to [lwb:upb] .
Int_t GetNoElements() const
Element * GetMatrixArray()
Long64_t LocMin(Long64_t n, const T *a)
Return index of array with the minimum element.
Long64_t LocMax(Long64_t n, const T *a)
Return index of array with the maximum element.
Double_t Sqrt(Double_t x)
Short_t Min(Short_t a, Short_t b)
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
Double_t Median(Long64_t n, const T *a, const Double_t *w=0, Long64_t *work=0)
Return the median of the array a where each entry i has weight w[i] .
static uint64_t sum(uint64_t i)