Logo ROOT  
Reference Guide
TGraphQQ.cxx
Go to the documentation of this file.
1// @(#)root/graf:$Id$
2// Author: Anna Kreshuk 18/11/2005
3
4/*************************************************************************
5 * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers. *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#include "TGraphQQ.h"
13#include "TAxis.h"
14#include "TF1.h"
15#include "TMath.h"
16
18
19/** \class TGraphQQ
20\ingroup BasicGraphics
21
22This class allows to draw quantile-quantile plots
23
24Plots can be drawn for 2 datasets or for a dataset and a theoretical
25distribution function
26
27## 2 datasets:
28 Quantile-quantile plots are used to determine whether 2 samples come from
29 the same distribution.
30 A qq-plot draws the quantiles of one dataset against the quantile of the
31 the other. The quantiles of the dataset with fewer entries are on Y axis,
32 with more entries - on X axis.
33 A straight line, going through 0.25 and 0.75 quantiles is also plotted
34 for reference. It represents a robust linear fit, not sensitive to the
35 extremes of the datasets.
36 If the datasets come from the same distribution, points of the plot should
37 fall approximately on the 45 degrees line. If they have the same
38 distribution function, but location or scale different parameters,
39 they should still fall on the straight line, but not the 45 degrees one.
40 The greater their departure from the straight line, the more evidence there
41 is, that the datasets come from different distributions.
42 The advantage of qq-plot is that it not only shows that the underlying
43 distributions are different, but, unlike the analytical methods, it also
44 gives information on the nature of this difference: heavier tails,
45 different location/scale, different shape, etc.
46
47 Some examples of qqplots of 2 datasets:
48
49\image html graf_graphqq1.png
50
51## 1 dataset:
52 Quantile-quantile plots are used to determine if the dataset comes from the
53 specified theoretical distribution, such as normal.
54 A qq-plot draws quantiles of the dataset against quantiles of the specified
55 theoretical distribution.
56 (NOTE, that density, not CDF should be specified)
57 A straight line, going through 0.25 and 0.75 quantiles can also be plotted
58 for reference. It represents a robust linear fit, not sensitive to the
59 extremes of the dataset.
60 As in the 2 datasets case, departures from straight line indicate departures
61 from the specified distribution.
62
63 "The correlation coefficient associated with the linear fit to the data
64 in the probability plot (qq plot in our case) is a measure of the
65 goodness of the fit.
66 Estimates of the location and scale parameters of the distribution
67 are given by the intercept and slope. Probability plots can be generated
68 for several competing distributions to see which provides the best fit,
69 and the probability plot generating the highest correlation coefficient
70 is the best choice since it generates the straightest probability plot."
71
72 From "Engineering statistic handbook",
73
74 http://www.itl.nist.gov/div898/handbook/eda/section3/probplot.htm
75
76 Example of a qq-plot of a dataset from N(3, 2) distribution and
77 TMath::Gaus(0, 1) theoretical function. Fitting parameters
78 are estimates of the distribution mean and sigma.
79
80\image html graf_graphqq2.png
81
82References:
83
84http://www.itl.nist.gov/div898/handbook/eda/section3/qqplot.htm
85
86http://www.itl.nist.gov/div898/handbook/eda/section3/probplot.htm
87*/
88
89////////////////////////////////////////////////////////////////////////////////
90/// default constructor
91
93{
94 fF = 0;
95 fY0 = 0;
96 fNy0 = 0;
97 fXq1 = 0.;
98 fXq2 = 0.;
99 fYq1 = 0.;
100 fYq2 = 0.;
101
102}
103
104////////////////////////////////////////////////////////////////////////////////
105/// Creates a quantile-quantile plot of dataset x.
106/// Theoretical distribution function can be defined later by SetFunction method
107
109 : TGraph(n)
110{
111 fNy0 = 0;
112 fXq1 = 0.;
113 fXq2 = 0.;
114 fYq1 = 0.;
115 fYq2 = 0.;
116
117 Int_t *index = new Int_t[n];
119 for (Int_t i=0; i<fNpoints; i++)
120 fY[i] = x[index[i]];
121 fF = nullptr;
122 fY0 = nullptr;
123 delete [] index;
124}
125
126////////////////////////////////////////////////////////////////////////////////
127/// Creates a quantile-quantile plot of dataset x against function f
128
130 : TGraph(n)
131{
132 fNy0 = 0;
133
134 Int_t *index = new Int_t[n];
136 for (Int_t i=0; i<fNpoints; i++)
137 fY[i] = x[index[i]];
138 delete [] index;
139 fF = f;
140 fY0 = nullptr;
142}
143
144////////////////////////////////////////////////////////////////////////////////
145/// Creates a quantile-quantile plot of dataset x against dataset y
146/// Parameters nx and ny are respective array sizes
147
149{
150 fNy0 = 0;
151 fXq1 = 0.;
152 fXq2 = 0.;
153 fYq1 = 0.;
154 fYq2 = 0.;
155 fF = nullptr;
156 fY0 = nullptr;
157
158 fNpoints = (nx <= ny) ? nx : ny;
159
160 if (!CtorAllocate()) return;
161
162 Int_t *index = new Int_t[TMath::Max(nx, ny)];
163 TMath::Sort(nx, x, index, kFALSE);
164 if (nx <=ny){
165 for (Int_t i=0; i<fNpoints; i++)
166 fY[i] = x[index[i]];
167 TMath::Sort(ny, y, index, kFALSE);
168 if (nx==ny){
169 for (Int_t i=0; i<fNpoints; i++)
170 fX[i] = y[index[i]];
171 fY0 = nullptr;
172 Quartiles();
173 } else {
174 fNy0 = ny;
175 fY0 = new Double_t[ny];
176 for (Int_t i=0; i<ny; i++)
177 fY0[i] = y[i];
179 }
180 } else {
181 fNy0 = nx;
182 fY0 = new Double_t[nx];
183 for (Int_t i=0; i<nx; i++)
184 fY0[i] = x[index[i]];
185 TMath::Sort(ny, y, index, kFALSE);
186 for (Int_t i=0; i<ny; i++)
187 fY[i] = y[index[i]];
189 }
190
191 delete [] index;
192}
193
194////////////////////////////////////////////////////////////////////////////////
195/// Destroys a TGraphQQ
196
198{
199 if (fY0)
200 delete [] fY0;
201 if (fF)
202 fF = nullptr;
203}
204
205////////////////////////////////////////////////////////////////////////////////
206/// Computes quantiles of theoretical distribution function
207
209{
210 if (!fF) return;
211 TString s = fF->GetTitle();
212 Double_t pk;
213 if (s.Contains("TMath::Gaus") || s.Contains("gaus")){
214 //use plotting positions optimal for normal distribution
215 for (Int_t k=1; k<=fNpoints; k++){
216 pk = (k-0.375)/(fNpoints+0.25);
217 fX[k-1]=TMath::NormQuantile(pk);
218 }
219 } else {
220 Double_t *prob = new Double_t[fNpoints];
221 if (fNpoints > 10){
222 for (Int_t k=1; k<=fNpoints; k++)
223 prob[k-1] = (k-0.5)/fNpoints;
224 } else {
225 for (Int_t k=1; k<=fNpoints; k++)
226 prob[k-1] = (k-0.375)/(fNpoints+0.25);
227 }
228 //fF->GetQuantiles(fNpoints, prob, fX);
229 fF->GetQuantiles(fNpoints, fX, prob);
230 delete [] prob;
231 }
232
233 Quartiles();
234}
235
236////////////////////////////////////////////////////////////////////////////////
237/// When sample sizes are not equal, computes quantiles of the bigger sample
238/// by linear interpolation
239
241{
242
243
244 if (!fY0) return;
245
246 Double_t pi, pfrac;
247 Int_t pint;
248 for (Int_t i=0; i<fNpoints-1; i++){
249 pi = (fNy0-1)*Double_t(i)/Double_t(fNpoints-1);
250 pint = TMath::FloorNint(pi);
251 pfrac = pi - pint;
252 fX[i] = (1-pfrac)*fY0[pint]+pfrac*fY0[pint+1];
253 }
254 fX[fNpoints-1]=fY0[fNy0-1];
255
256 Quartiles();
257}
258
259////////////////////////////////////////////////////////////////////////////////
260/// compute quartiles
261/// a quartile is a 25 per cent or 75 per cent quantile
262
264{
265 Double_t prob[]={0.25, 0.75};
266 Double_t x[2];
267 Double_t y[2];
268 TMath::Quantiles(fNpoints, 2, fY, y, prob, kTRUE);
269 if (fY0)
270 TMath::Quantiles(fNy0, 2, fY0, x, prob, kTRUE);
271 else if (fF) {
272 TString s = fF->GetTitle();
273 if (s.Contains("TMath::Gaus") || s.Contains("gaus")){
274 x[0] = TMath::NormQuantile(0.25);
275 x[1] = TMath::NormQuantile(0.75);
276 } else
277 fF->GetQuantiles(2, x, prob);
278 }
279 else
280 TMath::Quantiles(fNpoints, 2, fX, x, prob, kTRUE);
281
282 fXq1=x[0]; fXq2=x[1]; fYq1=y[0]; fYq2=y[1];
283}
284
285////////////////////////////////////////////////////////////////////////////////
286///Sets the theoretical distribution function (density!)
287///and computes its quantiles
288
290{
291 fF = f;
293}
#define f(i)
Definition: RSha256.hxx:104
const Bool_t kFALSE
Definition: RtypesCore.h:101
double Double_t
Definition: RtypesCore.h:59
const Bool_t kTRUE
Definition: RtypesCore.h:100
#define ClassImp(name)
Definition: Rtypes.h:375
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
1-Dim function class
Definition: TF1.h:213
virtual Int_t GetQuantiles(Int_t nprobSum, Double_t *q, const Double_t *probSum)
Compute Quantiles for density distribution of this function.
Definition: TF1.cxx:2008
This class allows to draw quantile-quantile plots.
Definition: TGraphQQ.h:18
void Quartiles()
compute quartiles a quartile is a 25 per cent or 75 per cent quantile
Definition: TGraphQQ.cxx:263
TGraphQQ()
default constructor
Definition: TGraphQQ.cxx:92
virtual ~TGraphQQ()
Destroys a TGraphQQ.
Definition: TGraphQQ.cxx:197
TF1 * fF
theoretical density function, if specified
Definition: TGraphQQ.h:26
Double_t fYq1
y1 coordinate of the interquartile line
Definition: TGraphQQ.h:23
Double_t * fY0
! second dataset, if specified
Definition: TGraphQQ.h:25
void SetFunction(TF1 *f)
Sets the theoretical distribution function (density!) and computes its quantiles.
Definition: TGraphQQ.cxx:289
Double_t fXq2
x2 coordinate of the interquartile line
Definition: TGraphQQ.h:22
Int_t fNy0
size of the fY0 dataset
Definition: TGraphQQ.h:20
void MakeFunctionQuantiles()
Computes quantiles of theoretical distribution function.
Definition: TGraphQQ.cxx:208
Double_t fXq1
x1 coordinate of the interquartile line
Definition: TGraphQQ.h:21
Double_t fYq2
y2 coordinate of the interquartile line
Definition: TGraphQQ.h:24
void MakeQuantiles()
When sample sizes are not equal, computes quantiles of the bigger sample by linear interpolation.
Definition: TGraphQQ.cxx:240
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
Int_t fNpoints
Number of points <= fMaxSize.
Definition: TGraph.h:46
Double_t * fY
[fNpoints] array of Y points
Definition: TGraph.h:48
Bool_t CtorAllocate()
In constructors set fNpoints than call this method.
Definition: TGraph.cxx:780
Double_t * fX
[fNpoints] array of X points
Definition: TGraph.h:47
const char * GetTitle() const override
Returns title of object.
Definition: TNamed.h:48
Basic string class.
Definition: TString.h:136
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
static constexpr double s
static constexpr double pi
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Definition: TMathBase.h:250
Int_t FloorNint(Double_t x)
Returns the nearest integer of TMath::Floor(x).
Definition: TMath.h:684
Double_t NormQuantile(Double_t p)
Computes quantiles for standard normal distribution N(0, 1) at probability p.
Definition: TMath.cxx:2451
void Quantiles(Int_t n, Int_t nprob, Double_t *x, Double_t *quantiles, Double_t *prob, Bool_t isSorted=kTRUE, Int_t *index=nullptr, Int_t type=7)
Computes sample quantiles, corresponding to the given probabilities.
Definition: TMath.cxx:1207
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
Sort the n elements of the array a of generic templated type Element.
Definition: TMathBase.h:431