The script calculates the bandwidth for filling and retrieving bin contents (in million entries per second) for these two histogramming techniques, where "seconds" is CPU and real time.
The first line of the plots contains the bandwidth based on the CPU time (THnSpase, TH1/2/3/nF*, ratio), the second line shows the plots for real time, and the third line shows the fraction of filled bins and memory used by THnSparse vs. TH1/2/3/nF.
The timing depends on the distribution and the amount of entries in the histograms; here, a Gaussian distribution (center is contained in the histograms) is used to fill each histogram with 1000 entries. The filling and reading is repeated until enough statistics have been collected.
This macro should be run in compiled mode due to the many nested loops that force CINT to disable its optimization. If run interpreted one would not benchmark THnSparse but CINT.
#ifndef INT_MAX
#define INT_MAX std::numeric_limits<int>::max()
#endif
class TTimeHists {
public:
enum EHist { kHist, kSparse, kNumHist };
enum ETime { kReal, kCPU, kNumTime };
fValue(0), fDim(dim), fBins(bins), fNum(num),
fSparse(0), fHist(0), fHn(0) {}
~TTimeHists();
bool Run();
Double_t GetTime(EHist hist, ETime time)
const {
if (time == kReal) return fTime[hist][0];
return fTime[hist][1]; }
static void SetDebug(
Int_t lvl) { fgDebug = lvl; }
THnSparse* GetSparse()
const {
return fSparse; }
protected:
void Fill(EHist hist);
void SetupHist(EHist hist);
void NextValues();
void SetupValues();
private:
};
Int_t TTimeHists::fgDebug = 0;
TTimeHists::~TTimeHists()
{
delete [] fValue;
delete fSparse;
delete fHist;
delete fHn;
}
bool TTimeHists::Run()
{
for (
int h = 0;
h < 2; ++
h) {
SetupValues();
try {
do {
check[
h] = Check((EHist)
h);
|| (
h && rep[0] > 0 && rep[1] < rep[0]));
fTime[
h][0] = (1.* fNum * rep[
h]) / w.
RealTime() / 1E6;
fTime[
h][1] = (1.* fNum * rep[
h]) / w.
CpuTime() / 1E6;
if (
h == 1 && (fTime[
h][0] > 1E20 || fTime[
h][1] > 1E20)) {
do {
fTime[
h][0] = (1.* fNum * rep[
h]) / w.
RealTime() / 1E6;
fTime[
h][1] = (1.* fNum * rep[
h]) / w.
CpuTime() / 1E6;
}
if (fTime[
h][0] > 1E20) fTime[
h][0] = 1E20;
if (fTime[
h][1] > 1E20) fTime[
h][1] = 1E20;
}
catch (std::exception&) {
fTime[
h][0] = fTime[
h][1] = -1.;
}
}
if (check[0] != check[1])
if (check[0] != -1.)
printf("ERROR: mismatch of histogram (%g) and sparse histogram (%g) for dim=%d, bins=%d!\n",
check[0], check[1], fDim, fBins);
return (check[0] == check[1]);
}
void TTimeHists::NextValues()
{
}
void TTimeHists::SetupValues()
{
if (!fValue) fValue =
new Double_t[fDim];
}
void TTimeHists::Fill(EHist hist)
{
NextValues();
if (fgDebug > 1) {
printf(
"%ld: fill %s",
n, hist == kHist? (fDim < 4 ?
"hist" :
"arr") :
"sparse");
printf(
"[%g]", fValue[
d]);
printf("\n");
}
if (hist == kHist) {
switch (fDim) {
case 1: fHist->Fill(fValue[0]); break;
case 2: ((
TH2F*)fHist)->Fill(fValue[0], fValue[1]);
break;
case 3: ((
TH3F*)fHist)->Fill(fValue[0], fValue[1], fValue[2]);
break;
default: fHn->Fill(fValue); break;
}
} else {
fSparse->Fill(fValue);
}
}
}
void TTimeHists::SetupHist(EHist hist)
{
if (hist == kHist) {
switch (fDim) {
case 1: fHist =
new TH1F(
"h1",
"h1", fBins, -1., 1.);
break;
case 2: fHist =
new TH2F(
"h2",
"h2", fBins, -1., 1., fBins, -1., 1.);
break;
case 3: fHist =
new TH3F(
"h3",
"h3", fBins, -1., 1., fBins, -1., 1., fBins, -1., 1.);
break;
default:
{
throw std::bad_alloc();
size *= (fBins + 2);
}
throw std::bad_alloc();
}
}
}
} else {
}
}
}
{
memset(
x, 0,
sizeof(
Int_t) * fDim);
if (hist == kHist) {
size *= (fBins + 2);
while (
x[0] <= fBins + 1) {
if (fDim < 4) {
if (fDim == 2) histidx = fHist->GetBin(
x[0],
x[1]);
else if (fDim == 3) histidx = fHist->GetBin(
x[0],
x[1],
x[2]);
v = fHist->GetBinContent(histidx);
}
else v = fHn->GetBinContent(
x);
if (fgDebug > 2 || (fgDebug > 1 &&
v)) {
printf("%s%d", fDim < 4 ? "hist" : "arr", fDim);
}
}
}
++idx;
}
} else {
for (
Long64_t i = 0; i < fSparse->GetNbins(); ++i) {
if (fgDebug > 1) {
printf("sparse%d", fDim);
}
}
}
check /= fNum;
if (fgDebug > 0)
printf("check %s%d = %g\n", hist == kHist ? (fDim < 4 ? "hist" : "arr") : "sparse", fDim, check);
return check;
}
void sparsehist() {
#if defined (__CLING__)
printf("Please run this script in compiled mode by running \".x sparsehist.C+\"\n");
return;
#endif
TH2F* htime[TTimeHists::kNumHist][TTimeHists::kNumTime];
for (
int h = 0;
h < TTimeHists::kNumHist; ++
h)
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
if (
h == 0)
name +=
"arr";
if (t == 0)
name +=
"_r";
title.
Form(
"Throughput (fill,get) %s (%s, 1M entries/sec);dim;bins;1M entries/sec",
h == 0 ?
"TH1/2/3/nF" :
"THnSparseF", t == 0 ?
"real" :
"CPU");
htime[
h][t] =
new TH2F(
name, title, 6, 0.5, 6.5, 10, 5, 105);
}
TH2F* hsparse_mem =
new TH2F(
"hsparse_mem",
"Fractional memory usage;dim;bins;fraction of memory used", 6, 0.5, 6.5, 10, 5, 105);
TH2F* hsparse_bins =
new TH2F(
"hsparse_bins",
"Fractional number of used bins;dim;bins;fraction of filled bins", 6, 0.5, 6.5, 10, 5, 105);
for (
Int_t dim = 1; dim < 7; ++dim) {
printf("Processing dimension %d", dim);
for (
Int_t bins = 10; bins <= 100; bins += 10) {
TTimeHists timer(dim, bins, 1000);
timer.Run();
for (
int h = 0;
h < TTimeHists::kNumHist; ++
h) {
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
Double_t time = timer.GetTime((TTimeHists::EHist)
h, (TTimeHists::ETime)t);
if (time >= 0.)
htime[
h][t]->
Fill(dim, bins, time);
}
}
hsparse_mem->
Fill(dim, bins, timer.GetSparse()->GetSparseFractionMem());
hsparse_bins->
Fill(dim, bins, timer.GetSparse()->GetSparseFractionBins());
if (max < timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal))
max = timer.GetTime(TTimeHists::kSparse, TTimeHists::kReal);
printf(".");
fflush(stdout);
}
printf(" done\n");
}
TH2F* htime_ratio[TTimeHists::kNumTime];
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
const char*
name = t ?
"htime_ratio" :
"htime_ratio_r";
htime_ratio[t] = (
TH2F*) htime[TTimeHists::kSparse][t]->Clone(
name);
title.
Form(
"Relative speed improvement (%s, 1M entries/sec): sparse/hist;dim;bins;#Delta 1M entries/sec", t == 0 ?
"real" :
"CPU");
htime_ratio[t]->
Divide(htime[TTimeHists::kHist][t]);
}
const char* opt = "TEXT COL";
for (int t = 0; t < TTimeHists::kNumTime; ++t) {
for (
int h = 0;
h < TTimeHists::kNumHist; ++
h) {
}
htime_ratio[t]->
Draw(opt);
gPad->SetLogz();
}
canv->
cd(7); hsparse_mem->
Draw(opt);
canv->
cd(8); hsparse_bins->
Draw(opt);
}
THnSparseT< TArrayF > THnSparseF
R__EXTERN TRandom * gRandom
R__EXTERN TStyle * gStyle
R__EXTERN TSystem * gSystem
virtual void SetMarkerSize(Size_t msize=1)
Set the marker size.
TVirtualPad * cd(Int_t subpadnumber=0) override
Set current canvas & pad.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
1-D histogram with a float per channel (see TH1 documentation)}
TH1 is the base class of all histogram classes in ROOT.
virtual void SetTitle(const char *title)
See GetStatOverflows for more information.
virtual void SetMaximum(Double_t maximum=-1111)
virtual void SetMinimum(Double_t minimum=-1111)
virtual void Draw(Option_t *option="")
Draw this histogram with options.
virtual Bool_t Divide(TF1 *f1, Double_t c1=1)
Performs the operation: this = this/(c1*f1) if errors are defined (see TH1::Sumw2),...
2-D histogram with a float per channel (see TH1 documentation)}
Int_t Fill(Double_t)
Invalid Fill method.
3-D histogram with a float per channel (see TH1 documentation)}
Efficient multidimensional histogram.
Multidimensional histogram.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
void Divide(Int_t nx=1, Int_t ny=1, Float_t xmargin=0.01, Float_t ymargin=0.01, Int_t color=0) override
Automatic pad generation by division.
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
virtual void SetSeed(ULong_t seed=0)
Set the random generator seed.
Double_t RealTime()
Stop the stopwatch (if it is running) and return the realtime (in seconds) passed between the start a...
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
Double_t CpuTime()
Stop the stopwatch (if it is running) and return the cputime (in seconds) passed between the start an...
void Stop()
Stop the stopwatch.
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
void SetPaintTextFormat(const char *format="g")
void SetPalette(Int_t ncolors=kBird, Int_t *colors=0, Float_t alpha=1.)
See TColor::SetPalette.
virtual int GetMemInfo(MemInfo_t *info) const
Returns ram and swap memory usage info into the MemInfo_t structure.