Logo ROOT   6.18/05
Reference Guide
sparsehist.C File Reference

Detailed Description

Evaluate the performance of THnSparse vs TH1/2/3/nF for different numbers of dimensions and bins per dimension.

The script calculates the bandwidth for filling and retrieving bin contents (in million entries per second) for these two histogramming techniques, where "seconds" is CPU and real time.

The first line of the plots contains the bandwidth based on the CPU time (THnSpase, TH1/2/3/nF*, ratio), the second line shows the plots for real time, and the third line shows the fraction of filled bins and memory used by THnSparse vs. TH1/2/3/nF.

The timing depends on the distribution and the amount of entries in the histograms; here, a Gaussian distribution (center is contained in the histograms) is used to fill each histogram with 1000 entries. The filling and reading is repeated until enough statistics have been collected.

tutorials/tree/drawsparse.C shows an example for visualizing a THnSparse. It creates a TTree which is then drawn using TParallelCoord.

This macro should be run in compiled mode due to the many nested loops that force CINT to disable its optimization. If run interpreted one would not benchmark THnSparse but CINT.

Run as:

root[0] .L $ROOTSYS/tutorials/hist/sparsehist.C+
root[1] sparsehist()
#include "TH1.h"
#include "TH2.h"
#include "TH3.h"
#include "THn.h"
#include "THnSparse.h"
#include "TStopwatch.h"
#include "TRandom.h"
#include "TCanvas.h"
#include "TFile.h"
#include "TStyle.h"
#include "TSystem.h"
#ifndef INT_MAX
#define INT_MAX std::numeric_limits<int>::max()
#endif
class TTimeHists {
public:
enum EHist { kHist, kSparse, kNumHist };
enum ETime { kReal, kCPU, kNumTime };
TTimeHists(Int_t dim, Int_t bins, Long_t num):
fValue(0), fDim(dim), fBins(bins), fNum(num),
fSparse(0), fHist(0), fHn(0) {}
~TTimeHists();
bool Run();
Double_t GetTime(EHist hist, ETime time) const {
if (time == kReal) return fTime[hist][0];
return fTime[hist][1]; }
static void SetDebug(Int_t lvl) { fgDebug = lvl; }
THnSparse* GetSparse() const { return fSparse; }
protected:
void Fill(EHist hist);
Double_t Check(EHist hist);
void SetupHist(EHist hist);
void NextValues();
void SetupValues();
private:
Int_t fDim;
Int_t fBins;
Long_t fNum;
Double_t fTime[2][2];
THnSparse* fSparse;
TH1* fHist;
THn* fHn;
static Int_t fgDebug;
};
Int_t TTimeHists::fgDebug = 0;
TTimeHists::~TTimeHists()
{
delete [] fValue;
delete fSparse;
delete fHist;
delete fHn;
}
bool TTimeHists::Run()
{
// run all tests with current settings, and check for identity of content.
Double_t check[2];
Long64_t rep[2];
for (int h = 0; h < 2; ++h) {
rep[h] = 0;
SetupValues();
try {
w.Start();
SetupHist((EHist) h);
w.Stop();
do {
Fill((EHist) h);
check[h] = Check((EHist) h);
w.Stop();
++rep[h];
} while ((!h && w.RealTime() < 0.1)
|| (h && rep[0] > 0 && rep[1] < rep[0]));
fTime[h][0] = (1.* fNum * rep[h]) / w.RealTime() / 1E6;
fTime[h][1] = (1.* fNum * rep[h]) / w.CpuTime() / 1E6;
if (h == 1 && (fTime[h][0] > 1E20 || fTime[h][1] > 1E20)) {
do {
// some more cycles:
Fill((EHist) h);
Check((EHist) h);
w.Stop();
++rep[h];
} while (w.RealTime() < 0.1);
fTime[h][0] = (1.* fNum * rep[h]) / w.RealTime() / 1E6;
fTime[h][1] = (1.* fNum * rep[h]) / w.CpuTime() / 1E6;
}
if (fTime[h][0] > 1E20) fTime[h][0] = 1E20;
if (fTime[h][1] > 1E20) fTime[h][1] = 1E20;
}
catch (std::exception&) {
fTime[h][0] = fTime[h][1] = -1.;
check[h] = -1.; // can never be < 1 without exception
rep[h] = -1;
}
}
if (check[0] != check[1])
if (check[0] != -1.)
printf("ERROR: mismatch of histogram (%g) and sparse histogram (%g) for dim=%d, bins=%d!\n",
check[0], check[1], fDim, fBins);
// else
// printf("ERROR: cannot allocate histogram for dim=%d, bins=%d - out of memory!\n",
// fDim, fBins);
return (check[0] == check[1]);
}
void TTimeHists::NextValues()
{
for (Int_t d = 0; d < fDim; ++d)
fValue[d] = gRandom->Gaus() / 4.;
}
void TTimeHists::SetupValues()
{
// define fValue
if (!fValue) fValue = new Double_t[fDim];
}
void TTimeHists::Fill(EHist hist)
{
for (Long_t n = 0; n < fNum; ++n) {
NextValues();
if (fgDebug > 1) {
printf("%ld: fill %s", n, hist == kHist? (fDim < 4 ? "hist" : "arr") : "sparse");
for (Int_t d = 0; d < fDim; ++d)
printf("[%g]", fValue[d]);
printf("\n");
}
if (hist == kHist) {
switch (fDim) {
case 1: fHist->Fill(fValue[0]); break;
case 2: ((TH2F*)fHist)->Fill(fValue[0], fValue[1]); break;
case 3: ((TH3F*)fHist)->Fill(fValue[0], fValue[1], fValue[2]); break;
default: fHn->Fill(fValue); break;
}
} else {
fSparse->Fill(fValue);
}
}
}
void TTimeHists::SetupHist(EHist hist)
{
if (hist == kHist) {
switch (fDim) {
case 1: fHist = new TH1F("h1", "h1", fBins, -1., 1.); break;
case 2: fHist = new TH2F("h2", "h2", fBins, -1., 1., fBins, -1., 1.); break;
case 3: fHist = new TH3F("h3", "h3", fBins, -1., 1., fBins, -1., 1., fBins, -1., 1.); break;
default:
{
MemInfo_t meminfo;
gSystem->GetMemInfo(&meminfo);
Int_t size = 1;
for (Int_t d = 0; d < fDim; ++d) {
if ((Int_t)(size * sizeof(Float_t)) > INT_MAX / (fBins + 2)
|| (meminfo.fMemFree > 0
&& meminfo.fMemFree / 2 < (Int_t) (size * sizeof(Float_t)/1000/1000)))
throw std::bad_alloc();
size *= (fBins + 2);
}
if (meminfo.fMemFree > 0
&& meminfo.fMemFree / 2 < (Int_t) (size * sizeof(Float_t)/1000/1000))
throw std::bad_alloc();
Int_t* bins = new Int_t[fDim];
Double_t *xmin = new Double_t[fDim];
Double_t *xmax = new Double_t[fDim];
for (Int_t d = 0; d < fDim; ++d) {
bins[d] = fBins;
xmin[d] = -1.;
xmax[d] = 1.;
}
fHn = new THnF("hn", "hn", fDim, bins, xmin, xmax);
}
}
} else {
Int_t* bins = new Int_t[fDim];
Double_t *xmin = new Double_t[fDim];
Double_t *xmax = new Double_t[fDim];
for (Int_t d = 0; d < fDim; ++d) {
bins[d] = fBins;
xmin[d] = -1.;
xmax[d] = 1.;
}
fSparse = new THnSparseF("hs", "hs", fDim, bins, xmin, xmax);
}
}
Double_t TTimeHists::Check(EHist hist)
{
// Check bin content of all bins
Double_t check = 0.;
Int_t* x = new Int_t[fDim];
memset(x, 0, sizeof(Int_t) * fDim);
if (hist == kHist) {
Long_t idx = 0;
Long_t size = 1;
for (Int_t d = 0; d < fDim; ++d)
size *= (fBins + 2);
while (x[0] <= fBins + 1) {
Double_t v = -1.;
if (fDim < 4) {
Long_t histidx = x[0];
if (fDim == 2) histidx = fHist->GetBin(x[0], x[1]);
else if (fDim == 3) histidx = fHist->GetBin(x[0], x[1], x[2]);
v = fHist->GetBinContent(histidx);
}
else v = fHn->GetBinContent(x);
Double_t checkx = 0.;
if (v)
for (Int_t d = 0; d < fDim; ++d)
checkx += x[d];
check += checkx * v;
if (fgDebug > 2 || (fgDebug > 1 && v)) {
printf("%s%d", fDim < 4 ? "hist" : "arr", fDim);
for (Int_t d = 0; d < fDim; ++d)
printf("[%d]", x[d]);
printf(" = %g\n", v);
}
++x[fDim - 1];
// Adjust the bin idx
// no wrapping for dim 0 - it's what we break on!
for (Int_t d = fDim - 1; d > 0; --d) {
if (x[d] > fBins + 1) {
x[d] = 0;
++x[d - 1];
}
}
++idx;
} // while next bin
} else {
for (Long64_t i = 0; i < fSparse->GetNbins(); ++i) {
Double_t v = fSparse->GetBinContent(i, x);
Double_t checkx = 0.;
for (Int_t d = 0; d < fDim; ++d)
checkx += x[d];
check += checkx * v;
if (fgDebug > 1) {
printf("sparse%d", fDim);
for (Int_t d = 0; d < fDim; ++d)
printf("[%d]", x[d]);
printf(" = %g\n", v);
}
}
}
check /= fNum;
if (fgDebug > 0)
printf("check %s%d = %g\n", hist == kHist ? (fDim < 4 ? "hist" : "arr") : "sparse", fDim, check);
return check;
}
void sparsehist() {
// Exclude this macro also for Cling as this script requires exception support
// which is not supported in Cling as of v6.00/00.
#if defined (__CLING__)
printf("Please run this script in compiled mode by running \".x sparsehist.C+\"\n");
return;
#endif
TH2F* htime[TTimeHists::kNumHist][TTimeHists::kNumTime];
for (int h = 0; h < TTimeHists::kNumHist; ++h)
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
TString name("htime_");
if (h == 0) name += "arr";
else name += "sp";
if (t == 0) name += "_r";
TString title;
title.Form("Throughput (fill,get) %s (%s, 1M entries/sec);dim;bins;1M entries/sec", h == 0 ? "TH1/2/3/nF" : "THnSparseF", t == 0 ? "real" : "CPU");
htime[h][t] = new TH2F(name, title, 6, 0.5, 6.5, 10, 5, 105);
}
TH2F* hsparse_mem = new TH2F("hsparse_mem", "Fractional memory usage;dim;bins;fraction of memory used", 6, 0.5, 6.5, 10, 5, 105);
TH2F* hsparse_bins = new TH2F("hsparse_bins", "Fractional number of used bins;dim;bins;fraction of filled bins", 6, 0.5, 6.5, 10, 5, 105);
// TTimeHists::SetDebug(2);
Double_t max = -1.;
for (Int_t dim = 1; dim < 7; ++dim) {
printf("Processing dimension %d", dim);
for (Int_t bins = 10; bins <= 100; bins += 10) {
TTimeHists timer(dim, bins, /*num*/ 1000);
timer.Run();
for (int h = 0; h < TTimeHists::kNumHist; ++h) {
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
Double_t time = timer.GetTime((TTimeHists::EHist)h, (TTimeHists::ETime)t);
if (time >= 0.)
htime[h][t]->Fill(dim, bins, time);
}
}
hsparse_mem->Fill(dim, bins, timer.GetSparse()->GetSparseFractionMem());
hsparse_bins->Fill(dim, bins, timer.GetSparse()->GetSparseFractionBins());
if (max < timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal))
max = timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal);
printf(".");
fflush(stdout);
}
printf(" done\n");
}
Double_t markersize = 2.5;
hsparse_mem->SetMarkerSize(markersize);
hsparse_bins->SetMarkerSize(markersize);
TH2F* htime_ratio[TTimeHists::kNumTime];
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
const char* name = t ? "htime_ratio" : "htime_ratio_r";
htime_ratio[t] = (TH2F*) htime[TTimeHists::kSparse][t]->Clone(name);
TString title;
title.Form("Relative speed improvement (%s, 1M entries/sec): sparse/hist;dim;bins;#Delta 1M entries/sec", t == 0 ? "real" : "CPU");
htime_ratio[t]->SetTitle(title);
htime_ratio[t]->Divide(htime[TTimeHists::kHist][t]);
htime_ratio[t]->SetMinimum(0.1);
htime_ratio[t]->SetMarkerSize(markersize);
}
TFile* f = new TFile("sparsehist.root","RECREATE");
TCanvas* canv= new TCanvas("c","c");
canv->Divide(3,3);
const char* opt = "TEXT COL";
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
for (int h = 0; h < TTimeHists::kNumHist; ++h) {
htime[h][t]->SetMaximum(max);
htime[h][t]->SetMarkerSize(markersize);
canv->cd(1 + h + 3 * t);
htime[h][t]->Draw(opt);
htime[h][t]->Write();
}
canv->cd(3 + t * 3);
htime_ratio[t]->Draw(opt); gPad->SetLogz();
htime_ratio[t]->Write();
}
canv->cd(7); hsparse_mem->Draw(opt);
canv->cd(8); hsparse_bins->Draw(opt);
hsparse_mem->Write();
hsparse_bins->Write();
canv->Write();
delete f;
}
SVector< double, 2 > v
Definition: Dict.h:5
PyObject * fValue
#define d(i)
Definition: RSha256.hxx:102
#define f(i)
Definition: RSha256.hxx:104
#define h(i)
Definition: RSha256.hxx:106
int Int_t
Definition: RtypesCore.h:41
const Bool_t kFALSE
Definition: RtypesCore.h:88
long Long_t
Definition: RtypesCore.h:50
double Double_t
Definition: RtypesCore.h:55
long long Long64_t
Definition: RtypesCore.h:69
float Float_t
Definition: RtypesCore.h:53
char name[80]
Definition: TGX11.cxx:109
float xmin
Definition: THbookFile.cxx:93
float xmax
Definition: THbookFile.cxx:93
THnSparseT< TArrayF > THnSparseF
Definition: THnSparse.h:220
THnT< Float_t > THnF
Definition: THn.h:246
R__EXTERN TRandom * gRandom
Definition: TRandom.h:62
R__EXTERN TStyle * gStyle
Definition: TStyle.h:406
R__EXTERN TSystem * gSystem
Definition: TSystem.h:560
#define gPad
Definition: TVirtualPad.h:286
virtual void SetMarkerSize(Size_t msize=1)
Set the marker size.
Definition: TAttMarker.h:41
The Canvas class.
Definition: TCanvas.h:31
TVirtualPad * cd(Int_t subpadnumber=0)
Set current canvas & pad.
Definition: TCanvas.cxx:693
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:48
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
The TH1 histogram class.
Definition: TH1.h:56
virtual void SetTitle(const char *title)
See GetStatOverflows for more information.
Definition: TH1.cxx:6309
virtual void SetMaximum(Double_t maximum=-1111)
Definition: TH1.h:394
virtual void SetMinimum(Double_t minimum=-1111)
Definition: TH1.h:395
virtual void Draw(Option_t *option="")
Draw this histogram with options.
Definition: TH1.cxx:2981
virtual Bool_t Divide(TF1 *f1, Double_t c1=1)
Performs the operation: this = this/(c1*f1) if errors are defined (see TH1::Sumw2),...
Definition: TH1.cxx:2736
2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:248
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:292
3-D histogram with a float per channel (see TH1 documentation)}
Definition: TH3.h:265
Efficient multidimensional histogram.
Definition: THnSparse.h:36
Multidimensional histogram.
Definition: THn.h:36
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:785
virtual void Divide(Int_t nx=1, Int_t ny=1, Float_t xmargin=0.01, Float_t ymargin=0.01, Int_t color=0)
Automatic pad generation by division.
Definition: TPad.cxx:1166
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
Definition: TRandom.cxx:263
virtual void SetSeed(ULong_t seed=0)
Set the random generator seed.
Definition: TRandom.cxx:597
Stopwatch class.
Definition: TStopwatch.h:28
Double_t RealTime()
Stop the stopwatch (if it is running) and return the realtime (in seconds) passed between the start a...
Definition: TStopwatch.cxx:110
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
Definition: TStopwatch.cxx:58
Double_t CpuTime()
Stop the stopwatch (if it is running) and return the cputime (in seconds) passed between the start an...
Definition: TStopwatch.cxx:125
void Stop()
Stop the stopwatch.
Definition: TStopwatch.cxx:77
Basic string class.
Definition: TString.h:131
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Definition: TString.cxx:2289
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition: TStyle.cxx:1444
void SetPaintTextFormat(const char *format="g")
Definition: TStyle.h:363
void SetPalette(Int_t ncolors=kBird, Int_t *colors=0, Float_t alpha=1.)
See TColor::SetPalette.
Definition: TStyle.cxx:1637
virtual int GetMemInfo(MemInfo_t *info) const
Returns ram and swap memory usage info into the MemInfo_t structure.
Definition: TSystem.cxx:2522
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
Int_t fMemFree
Definition: TSystem.h:183
Author
Axel.Naumann

Definition in file sparsehist.C.