Logo ROOT   6.12/07
Reference Guide
sparsehist.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_hist
3 /// Evaluate the performance of THnSparse vs TH1/2/3/nF
4 /// for different numbers of dimensions and bins per dimension.
5 ///
6 /// The script calculates the bandwidth for filling and retrieving
7 /// bin contents (in million entries per second) for these two
8 /// histogramming techniques, where "seconds" is CPU and real time.
9 ///
10 /// The first line of the plots contains the bandwidth based on the
11 /// CPU time (THnSpase, TH1/2/3/nF*, ratio), the second line shows
12 /// the plots for real time, and the third line shows the fraction of
13 /// filled bins and memory used by THnSparse vs. TH1/2/3/nF.
14 ///
15 /// The timing depends on the distribution and the amount of entries
16 /// in the histograms; here, a Gaussian distribution (center is
17 /// contained in the histograms) is used to fill each histogram with
18 /// 1000 entries. The filling and reading is repeated until enough
19 /// statistics have been collected.
20 ///
21 /// tutorials/tree/drawsparse.C shows an example for visualizing a
22 /// THnSparse. It creates a TTree which is then drawn using
23 /// TParallelCoord.
24 ///
25 /// This macro should be run in compiled mode due to the many nested
26 /// loops that force CINT to disable its optimization. If run
27 /// interpreted one would not benchmark THnSparse but CINT.
28 ///
29 /// Run as:
30 /// ~~~{.cpp}
31 /// root[0] .L $ROOTSYS/tutorials/hist/sparsehist.C+
32 /// root[1] sparsehist()
33 /// ~~~
34 ///
35 /// \macro_code
36 ///
37 /// \author Axel.Naumann
38 
39 #include "TH1.h"
40 #include "TH2.h"
41 #include "TH3.h"
42 #include "THn.h"
43 #include "THnSparse.h"
44 #include "TStopwatch.h"
45 #include "TRandom.h"
46 #include "TCanvas.h"
47 #include "TFile.h"
48 #include "TStyle.h"
49 #include "TSystem.h"
50 
51 #ifndef INT_MAX
52 #define INT_MAX std::numeric_limits<int>::max()
53 #endif
54 
55 class TTimeHists {
56 public:
57  enum EHist { kHist, kSparse, kNumHist };
58  enum ETime { kReal, kCPU, kNumTime };
59  TTimeHists(Int_t dim, Int_t bins, Long_t num):
60  fValue(0), fDim(dim), fBins(bins), fNum(num),
61  fSparse(0), fHist(0), fHn(0) {}
62  ~TTimeHists();
63  bool Run();
64  Double_t GetTime(EHist hist, ETime time) const {
65  if (time == kReal) return fTime[hist][0];
66  return fTime[hist][1]; }
67  static void SetDebug(Int_t lvl) { fgDebug = lvl; }
68  THnSparse* GetSparse() const { return fSparse; }
69 
70 protected:
71  void Fill(EHist hist);
72  Double_t Check(EHist hist);
73  void SetupHist(EHist hist);
74  void NextValues();
75  void SetupValues();
76 
77 private:
79  Int_t fDim;
80  Int_t fBins;
81  Long_t fNum;
82  Double_t fTime[2][2];
83  THnSparse* fSparse;
84  TH1* fHist;
85  THn* fHn;
86  static Int_t fgDebug;
87 };
88 
89 Int_t TTimeHists::fgDebug = 0;
90 
91 TTimeHists::~TTimeHists()
92 {
93  delete [] fValue;
94  delete fSparse;
95  delete fHist;
96  delete fHn;
97 }
98 
99 bool TTimeHists::Run()
100 {
101  // run all tests with current settings, and check for identity of content.
102 
103  Double_t check[2];
104  Long64_t rep[2];
105  for (int h = 0; h < 2; ++h) {
106  rep[h] = 0;
107  SetupValues();
108  try {
109  TStopwatch w;
110  w.Start();
111  SetupHist((EHist) h);
112  w.Stop();
113  do {
114  w.Start(kFALSE);
115  Fill((EHist) h);
116  check[h] = Check((EHist) h);
117  w.Stop();
118  ++rep[h];
119  } while ((!h && w.RealTime() < 0.1)
120  || (h && rep[0] > 0 && rep[1] < rep[0]));
121 
122  fTime[h][0] = (1.* fNum * rep[h]) / w.RealTime() / 1E6;
123  fTime[h][1] = (1.* fNum * rep[h]) / w.CpuTime() / 1E6;
124 
125  if (h == 1 && (fTime[h][0] > 1E20 || fTime[h][1] > 1E20)) {
126  do {
127  // some more cycles:
128  w.Start(kFALSE);
129  Fill((EHist) h);
130  Check((EHist) h);
131  w.Stop();
132  ++rep[h];
133  } while (w.RealTime() < 0.1);
134 
135  fTime[h][0] = (1.* fNum * rep[h]) / w.RealTime() / 1E6;
136  fTime[h][1] = (1.* fNum * rep[h]) / w.CpuTime() / 1E6;
137  }
138 
139  if (fTime[h][0] > 1E20) fTime[h][0] = 1E20;
140  if (fTime[h][1] > 1E20) fTime[h][1] = 1E20;
141  }
142  catch (std::exception&) {
143  fTime[h][0] = fTime[h][1] = -1.;
144  check[h] = -1.; // can never be < 1 without exception
145  rep[h] = -1;
146  }
147  }
148  if (check[0] != check[1])
149  if (check[0] != -1.)
150  printf("ERROR: mismatch of histogram (%g) and sparse histogram (%g) for dim=%d, bins=%d!\n",
151  check[0], check[1], fDim, fBins);
152  // else
153  // printf("ERROR: cannot allocate histogram for dim=%d, bins=%d - out of memory!\n",
154  // fDim, fBins);
155  return (check[0] == check[1]);
156 }
157 
158 void TTimeHists::NextValues()
159 {
160  for (Int_t d = 0; d < fDim; ++d)
161  fValue[d] = gRandom->Gaus() / 4.;
162 }
163 
164 void TTimeHists::SetupValues()
165 {
166  // define fValue
167  if (!fValue) fValue = new Double_t[fDim];
168  gRandom->SetSeed(42);
169 }
170 
171 void TTimeHists::Fill(EHist hist)
172 {
173  for (Long_t n = 0; n < fNum; ++n) {
174  NextValues();
175  if (fgDebug > 1) {
176  printf("%ld: fill %s", n, hist == kHist? (fDim < 4 ? "hist" : "arr") : "sparse");
177  for (Int_t d = 0; d < fDim; ++d)
178  printf("[%g]", fValue[d]);
179  printf("\n");
180  }
181  if (hist == kHist) {
182  switch (fDim) {
183  case 1: fHist->Fill(fValue[0]); break;
184  case 2: ((TH2F*)fHist)->Fill(fValue[0], fValue[1]); break;
185  case 3: ((TH3F*)fHist)->Fill(fValue[0], fValue[1], fValue[2]); break;
186  default: fHn->Fill(fValue); break;
187  }
188  } else {
189  fSparse->Fill(fValue);
190  }
191  }
192 }
193 
194 void TTimeHists::SetupHist(EHist hist)
195 {
196  if (hist == kHist) {
197  switch (fDim) {
198  case 1: fHist = new TH1F("h1", "h1", fBins, -1., 1.); break;
199  case 2: fHist = new TH2F("h2", "h2", fBins, -1., 1., fBins, -1., 1.); break;
200  case 3: fHist = new TH3F("h3", "h3", fBins, -1., 1., fBins, -1., 1., fBins, -1., 1.); break;
201  default:
202  {
203  MemInfo_t meminfo;
204  gSystem->GetMemInfo(&meminfo);
205  Int_t size = 1;
206  for (Int_t d = 0; d < fDim; ++d) {
207  if ((Int_t)(size * sizeof(Float_t)) > INT_MAX / (fBins + 2)
208  || (meminfo.fMemFree > 0
209  && meminfo.fMemFree / 2 < (Int_t) (size * sizeof(Float_t)/1000/1000)))
210  throw std::bad_alloc();
211  size *= (fBins + 2);
212  }
213  if (meminfo.fMemFree > 0
214  && meminfo.fMemFree / 2 < (Int_t) (size * sizeof(Float_t)/1000/1000))
215  throw std::bad_alloc();
216  Int_t* bins = new Int_t[fDim];
217  Double_t *xmin = new Double_t[fDim];
218  Double_t *xmax = new Double_t[fDim];
219  for (Int_t d = 0; d < fDim; ++d) {
220  bins[d] = fBins;
221  xmin[d] = -1.;
222  xmax[d] = 1.;
223  }
224  fHn = new THnF("hn", "hn", fDim, bins, xmin, xmax);
225  }
226  }
227  } else {
228  Int_t* bins = new Int_t[fDim];
229  Double_t *xmin = new Double_t[fDim];
230  Double_t *xmax = new Double_t[fDim];
231  for (Int_t d = 0; d < fDim; ++d) {
232  bins[d] = fBins;
233  xmin[d] = -1.;
234  xmax[d] = 1.;
235  }
236  fSparse = new THnSparseF("hs", "hs", fDim, bins, xmin, xmax);
237  }
238 }
239 
240 Double_t TTimeHists::Check(EHist hist)
241 {
242  // Check bin content of all bins
243  Double_t check = 0.;
244  Int_t* x = new Int_t[fDim];
245  memset(x, 0, sizeof(Int_t) * fDim);
246 
247  if (hist == kHist) {
248  Long_t idx = 0;
249  Long_t size = 1;
250  for (Int_t d = 0; d < fDim; ++d)
251  size *= (fBins + 2);
252  while (x[0] <= fBins + 1) {
253  Double_t v = -1.;
254  if (fDim < 4) {
255  Long_t histidx = x[0];
256  if (fDim == 2) histidx = fHist->GetBin(x[0], x[1]);
257  else if (fDim == 3) histidx = fHist->GetBin(x[0], x[1], x[2]);
258  v = fHist->GetBinContent(histidx);
259  }
260  else v = fHn->GetBinContent(x);
261  Double_t checkx = 0.;
262  if (v)
263  for (Int_t d = 0; d < fDim; ++d)
264  checkx += x[d];
265  check += checkx * v;
266 
267  if (fgDebug > 2 || (fgDebug > 1 && v)) {
268  printf("%s%d", fDim < 4 ? "hist" : "arr", fDim);
269  for (Int_t d = 0; d < fDim; ++d)
270  printf("[%d]", x[d]);
271  printf(" = %g\n", v);
272  }
273 
274  ++x[fDim - 1];
275  // Adjust the bin idx
276  // no wrapping for dim 0 - it's what we break on!
277  for (Int_t d = fDim - 1; d > 0; --d) {
278  if (x[d] > fBins + 1) {
279  x[d] = 0;
280  ++x[d - 1];
281  }
282  }
283  ++idx;
284  } // while next bin
285  } else {
286  for (Long64_t i = 0; i < fSparse->GetNbins(); ++i) {
287  Double_t v = fSparse->GetBinContent(i, x);
288  Double_t checkx = 0.;
289  for (Int_t d = 0; d < fDim; ++d)
290  checkx += x[d];
291  check += checkx * v;
292 
293  if (fgDebug > 1) {
294  printf("sparse%d", fDim);
295  for (Int_t d = 0; d < fDim; ++d)
296  printf("[%d]", x[d]);
297  printf(" = %g\n", v);
298  }
299  }
300  }
301  check /= fNum;
302  if (fgDebug > 0)
303  printf("check %s%d = %g\n", hist == kHist ? (fDim < 4 ? "hist" : "arr") : "sparse", fDim, check);
304  return check;
305 }
306 
307 
308 void sparsehist() {
309 // Exclude this macro also for Cling as this script requires exception support
310 // which is not supported in Cling as of v6.00/00.
311 #if defined (__CLING__)
312  printf("Please run this script in compiled mode by running \".x sparsehist.C+\"\n");
313  return;
314 #endif
315 
316  TH2F* htime[TTimeHists::kNumHist][TTimeHists::kNumTime];
317  for (int h = 0; h < TTimeHists::kNumHist; ++h)
318  for (int t = 0; t < TTimeHists::kNumTime; ++t) {
319  TString name("htime_");
320  if (h == 0) name += "arr";
321  else name += "sp";
322  if (t == 0) name += "_r";
323 
324  TString title;
325  title.Form("Throughput (fill,get) %s (%s, 1M entries/sec);dim;bins;1M entries/sec", h == 0 ? "TH1/2/3/nF" : "THnSparseF", t == 0 ? "real" : "CPU");
326  htime[h][t] = new TH2F(name, title, 6, 0.5, 6.5, 10, 5, 105);
327  }
328 
329  TH2F* hsparse_mem = new TH2F("hsparse_mem", "Fractional memory usage;dim;bins;fraction of memory used", 6, 0.5, 6.5, 10, 5, 105);
330  TH2F* hsparse_bins = new TH2F("hsparse_bins", "Fractional number of used bins;dim;bins;fraction of filled bins", 6, 0.5, 6.5, 10, 5, 105);
331 
332  // TTimeHists::SetDebug(2);
333  Double_t max = -1.;
334  for (Int_t dim = 1; dim < 7; ++dim) {
335  printf("Processing dimension %d", dim);
336  for (Int_t bins = 10; bins <= 100; bins += 10) {
337  TTimeHists timer(dim, bins, /*num*/ 1000);
338  timer.Run();
339  for (int h = 0; h < TTimeHists::kNumHist; ++h) {
340  for (int t = 0; t < TTimeHists::kNumTime; ++t) {
341  Double_t time = timer.GetTime((TTimeHists::EHist)h, (TTimeHists::ETime)t);
342  if (time >= 0.)
343  htime[h][t]->Fill(dim, bins, time);
344  }
345  }
346  hsparse_mem->Fill(dim, bins, timer.GetSparse()->GetSparseFractionMem());
347  hsparse_bins->Fill(dim, bins, timer.GetSparse()->GetSparseFractionBins());
348 
349  if (max < timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal))
350  max = timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal);
351  printf(".");
352  fflush(stdout);
353  }
354  printf(" done\n");
355  }
356 
357  Double_t markersize = 2.5;
358  hsparse_mem->SetMarkerSize(markersize);
359  hsparse_bins->SetMarkerSize(markersize);
360 
361  TH2F* htime_ratio[TTimeHists::kNumTime];
362  for (int t = 0; t < TTimeHists::kNumTime; ++t) {
363  const char* name = t ? "htime_ratio" : "htime_ratio_r";
364  htime_ratio[t] = (TH2F*) htime[TTimeHists::kSparse][t]->Clone(name);
365  TString title;
366  title.Form("Relative speed improvement (%s, 1M entries/sec): sparse/hist;dim;bins;#Delta 1M entries/sec", t == 0 ? "real" : "CPU");
367  htime_ratio[t]->SetTitle(title);
368  htime_ratio[t]->Divide(htime[TTimeHists::kHist][t]);
369  htime_ratio[t]->SetMinimum(0.1);
370  htime_ratio[t]->SetMarkerSize(markersize);
371  }
372 
373  TFile* f = new TFile("sparsehist.root","RECREATE");
374 
375  TCanvas* canv= new TCanvas("c","c");
376  canv->Divide(3,3);
377 
378  gStyle->SetPalette(8,0);
379  gStyle->SetPaintTextFormat(".2g");
380  gStyle->SetOptStat(0);
381  const char* opt = "TEXT COL";
382 
383  for (int t = 0; t < TTimeHists::kNumTime; ++t) {
384  for (int h = 0; h < TTimeHists::kNumHist; ++h) {
385  htime[h][t]->SetMaximum(max);
386  htime[h][t]->SetMarkerSize(markersize);
387  canv->cd(1 + h + 3 * t);
388  htime[h][t]->Draw(opt);
389  htime[h][t]->Write();
390  }
391  canv->cd(3 + t * 3);
392  htime_ratio[t]->Draw(opt); gPad->SetLogz();
393  htime_ratio[t]->Write();
394  }
395 
396  canv->cd(7); hsparse_mem->Draw(opt);
397  canv->cd(8); hsparse_bins->Draw(opt);
398  hsparse_mem->Write();
399  hsparse_bins->Write();
400 
401  canv->Write();
402 
403  delete f;
404 }
Int_t fMemFree
Definition: TSystem.h:183
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:785
float xmin
Definition: THbookFile.cxx:93
Double_t RealTime()
Stop the stopwatch (if it is running) and return the realtime (in seconds) passed between the start a...
Definition: TStopwatch.cxx:110
long long Long64_t
Definition: RtypesCore.h:69
virtual void SetMaximum(Double_t maximum=-1111)
Definition: TH1.h:390
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
Definition: TStopwatch.cxx:58
float Float_t
Definition: RtypesCore.h:53
3-D histogram with a float per channel (see TH1 documentation)}
Definition: TH3.h:267
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
Definition: TRandom.cxx:256
R__EXTERN TStyle * gStyle
Definition: TStyle.h:402
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:285
TH1 * h
Definition: legend2.C:5
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
TVirtualPad * cd(Int_t subpadnumber=0)
Set current canvas & pad.
Definition: TCanvas.cxx:688
virtual void SetMinimum(Double_t minimum=-1111)
Definition: TH1.h:391
Basic string class.
Definition: TString.h:125
Double_t CpuTime()
Stop the stopwatch (if it is running) and return the cputime (in seconds) passed between the start an...
Definition: TStopwatch.cxx:125
virtual int GetMemInfo(MemInfo_t *info) const
Returns ram and swap memory usage info into the MemInfo_t structure.
Definition: TSystem.cxx:2508
int Int_t
Definition: RtypesCore.h:41
void Stop()
Stop the stopwatch.
Definition: TStopwatch.cxx:77
Double_t x[n]
Definition: legend1.C:17
Efficient multidimensional histogram.
Definition: THnSparse.h:36
virtual void SetSeed(ULong_t seed=0)
Set the random generator seed.
Definition: TRandom.cxx:589
THnSparseT< TArrayF > THnSparseF
Definition: THnSparse.h:217
virtual Bool_t Divide(TF1 *f1, Double_t c1=1)
Performs the operation: this = this/(c1*f1) if errors are defined (see TH1::Sumw2), errors are also recalculated.
Definition: TH1.cxx:2724
R__EXTERN TSystem * gSystem
Definition: TSystem.h:540
virtual void Draw(Option_t *option="")
Draw this histogram with options.
Definition: TH1.cxx:2969
SVector< double, 2 > v
Definition: Dict.h:5
PyObject * fValue
2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:249
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition: TString.cxx:2343
Multidimensional histogram.
Definition: THn.h:36
float xmax
Definition: THbookFile.cxx:93
R__EXTERN TRandom * gRandom
Definition: TRandom.h:62
virtual void SetMarkerSize(Size_t msize=1)
Set the marker size.
Definition: TAttMarker.h:41
const Bool_t kFALSE
Definition: RtypesCore.h:88
long Long_t
Definition: RtypesCore.h:50
The Canvas class.
Definition: TCanvas.h:31
double Double_t
Definition: RtypesCore.h:55
The TH1 histogram class.
Definition: TH1.h:56
THist< 3, float, THistStatContent, THistStatUncertainty > TH3F
Definition: THist.hxx:297
virtual void Divide(Int_t nx=1, Int_t ny=1, Float_t xmargin=0.01, Float_t ymargin=0.01, Int_t color=0)
Automatic pad generation by division.
Definition: TPad.cxx:1153
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1266
#define gPad
Definition: TVirtualPad.h:285
THnT< Float_t > THnF
Definition: THn.h:246
virtual void SetTitle(const char *title)
See GetStatOverflows for more information.
Definition: TH1.cxx:6154
void SetPaintTextFormat(const char *format="g")
Definition: TStyle.h:360
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:292
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Definition: THist.hxx:291
const Int_t n
Definition: legend1.C:16
void SetPalette(Int_t ncolors=kBird, Int_t *colors=0, Float_t alpha=1.)
See TColor::SetPalette.
Definition: TStyle.cxx:1459
char name[80]
Definition: TGX11.cxx:109
Stopwatch class.
Definition: TStopwatch.h:28