Logo ROOT   6.12/07
Reference Guide
principal.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_math
3 /// \notebook
4 /// Principal Components Analysis (PCA) example
5 ///
6 /// Example of using TPrincipal as a stand alone class.
7 ///
8 /// We create n-dimensional data points, where c = trunc(n / 5) + 1
9 /// are correlated with the rest n - c randomly distributed variables.
10 ///
11 /// \macro_output
12 /// \macro_code
13 ///
14 /// \authors Rene Brun, Christian Holm Christensen
15 
16 #include "TPrincipal.h"
17 
18 void principal(Int_t n=10, Int_t m=10000)
19 {
20  Int_t c = int(n / 5) + 1;
21 
22  cout << "*************************************************" << endl;
23  cout << "* Principal Component Analysis *" << endl;
24  cout << "* *" << endl;
25  cout << "* Number of variables: " << setw(4) << n
26  << " *" << endl;
27  cout << "* Number of data points: " << setw(8) << m
28  << " *" << endl;
29  cout << "* Number of dependent variables: " << setw(4) << c
30  << " *" << endl;
31  cout << "* *" << endl;
32  cout << "*************************************************" << endl;
33 
34 
35  // Initilase the TPrincipal object. Use the empty string for the
36  // final argument, if you don't wan't the covariance
37  // matrix. Normalising the covariance matrix is a good idea if your
38  // variables have different orders of magnitude.
39  TPrincipal* principal = new TPrincipal(n,"ND");
40 
41  // Use a pseudo-random number generator
42  TRandom* randumNum = new TRandom;
43 
44  // Make the m data-points
45  // Make a variable to hold our data
46  // Allocate memory for the data point
47  Double_t* data = new Double_t[n];
48  for (Int_t i = 0; i < m; i++) {
49 
50  // First we create the un-correlated, random variables, according
51  // to one of three distributions
52  for (Int_t j = 0; j < n - c; j++) {
53  if (j % 3 == 0) data[j] = randumNum->Gaus(5,1);
54  else if (j % 3 == 1) data[j] = randumNum->Poisson(8);
55  else data[j] = randumNum->Exp(2);
56  }
57 
58  // Then we create the correlated variables
59  for (Int_t j = 0 ; j < c; j++) {
60  data[n - c + j] = 0;
61  for (Int_t k = 0; k < n - c - j; k++) data[n - c + j] += data[k];
62  }
63 
64  // Finally we're ready to add this datapoint to the PCA
65  principal->AddRow(data);
66  }
67 
68  // We delete the data after use, since TPrincipal got it by now.
69  delete [] data;
70 
71  // Do the actual analysis
72  principal->MakePrincipals();
73 
74  // Print out the result on
75  principal->Print();
76 
77  // Test the PCA
78  principal->Test();
79 
80  // Make some histograms of the orginal, principal, residue, etc data
81  principal->MakeHistograms();
82 
83  // Make two functions to map between feature and pattern space
84  principal->MakeCode();
85 
86  // Start a browser, so that we may browse the histograms generated
87  // above
88  TBrowser* b = new TBrowser("principalBrowser", principal);
89 }
Principal Components Analysis (PCA)
Definition: TPrincipal.h:20
auto * m
Definition: textangle.C:8
virtual void Print(Option_t *opt="MSE") const
Print the statistics Options are.
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
Definition: TRandom.cxx:256
int Int_t
Definition: RtypesCore.h:41
void Test(Option_t *option="")
Test the PCA, bye calculating the sum square of residuals (see method SumOfSquareResiduals), and display the histogram.
This is the base class for the ROOT Random number generators.
Definition: TRandom.h:27
virtual void MakeHistograms(const char *name="pca", Option_t *option="epsdx")
Make histograms of the result of the analysis.
Definition: TPrincipal.cxx:569
Using a TBrowser one can browse all ROOT objects.
Definition: TBrowser.h:37
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
Definition: TPrincipal.cxx:410
virtual void MakePrincipals()
Perform the principal components analysis.
Definition: TPrincipal.cxx:869
double Double_t
Definition: RtypesCore.h:55
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
virtual void MakeCode(const char *filename="pca", Option_t *option="")
Generates the file <filename>, with .C appended if it does argument doesn&#39;t end in ...
Definition: TPrincipal.cxx:544
virtual Int_t Poisson(Double_t mean)
Generates a random integer N according to a Poisson law.
Definition: TRandom.cxx:383
const Int_t n
Definition: legend1.C:16
virtual Double_t Exp(Double_t tau)
Returns an exponential deviate.
Definition: TRandom.cxx:233