doc/v628/NeymanConstruction_8cxx_source.html

// @(#)root/roostats:$Id$

// Author: Kyle Cranmer   January 2009


/*************************************************************************

 * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers.               *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


/** \class RooStats::NeymanConstruction

    \ingroup Roostats


NeymanConstruction is a concrete implementation of the NeymanConstruction

interface that, as the name suggests, performs a NeymanConstruction. It produces

a RooStats::PointSetInterval, which is a concrete implementation of the

ConfInterval interface.


The Neyman Construction is not a uniquely defined statistical technique, it

requires that one specify an ordering rule or ordering principle, which is

usually incoded by choosing a specific test statistic and limits of integration

(corresponding to upper/lower/central limits). As a result, this class must be

configured with the corresponding information before it can produce an interval.

Common configurations, such as the Feldman-Cousins approach, can be enforced by

other light weight classes.


The Neyman Construction considers every point in the parameter space

independently, no assumptions are made that the interval is connected or of a

particular shape. As a result, the PointSetInterval class is used to represent

the result. The user indicate which points in the parameter space to perform

the construction by providing a PointSetInterval instance with the desired points.


This class is fairly light weight, because the choice of parameter points to be

considered is factorized and so is the creation of the sampling distribution of

the test statistic (which is done by a concrete class implementing the

DistributionCreator interface). As a result, this class basically just drives the

construction by:


  - using a DistributionCreator to create the SamplingDistribution of a user-

    defined test statistic for each parameter point of interest,

  - defining the acceptance region in the data by finding the thresholds on the

    test statistic such that the integral of the sampling distribution is of the

    appropriate size and consistent with the limits of integration

    (eg. upper/lower/central limits),

  - and finally updating the PointSetInterval based on whether the value of the

    test statistic evaluated on the data are in the acceptance region.


*/


#include "RooStats/NeymanConstruction.h"


#include "RooStats/RooStatsUtils.h"


#include "RooStats/PointSetInterval.h"


#include "RooStats/SamplingDistribution.h"

#include "RooStats/ToyMCSampler.h"

#include "RooStats/ModelConfig.h"


#include "RooMsgService.h"

#include "RooGlobalFunc.h"


#include "RooDataSet.h"

#include "TFile.h"

#include "TMath.h"

#include "TH1F.h"


ClassImp(RooStats::NeymanConstruction); ;


using namespace RooFit;

using namespace RooStats;

using namespace std;


////////////////////////////////////////////////////////////////////////////////

/// default constructor


NeymanConstruction::NeymanConstruction(RooAbsData& data, ModelConfig& model):

   fSize(0.05),

   fData(data),

   fModel(model),

   fTestStatSampler(0),

   fPointsToTest(0),

   fLeftSideFraction(0),

   fConfBelt(0),  // constructed with tree data

   fAdaptiveSampling(false),

   fAdditionalNToysFactor(1.),

   fSaveBeltToFile(false),

   fCreateBelt(false)


{

//   fWS = new RooWorkspace();

//   fOwnsWorkspace = true;

//   fDataName = "";

//   fPdfName = "";

}


////////////////////////////////////////////////////////////////////////////////

/// default constructor

///  if(fOwnsWorkspace && fWS) delete fWS;

///  if(fConfBelt) delete fConfBelt;


NeymanConstruction::~NeymanConstruction() {

}


////////////////////////////////////////////////////////////////////////////////

/// Main interface to get a RooStats::ConfInterval.

/// It constructs a RooStats::SetInterval.


PointSetInterval* NeymanConstruction::GetInterval() const {


  TFile* f=0;

  if(fSaveBeltToFile){

    //coverity[FORWARD_NULL]

    oocoutI(f,Contents) << "NeymanConstruction saving ConfidenceBelt to file SamplingDistributions.root" << endl;

    f = new TFile("SamplingDistributions.root","recreate");

  }


  Int_t npass = 0;

  RooArgSet* point;


  // strange problems when using snapshots.

  //  RooArgSet* fPOI = (RooArgSet*) fModel.GetParametersOfInterest()->snapshot();

  RooArgSet* fPOI = new RooArgSet(*fModel.GetParametersOfInterest());


  RooDataSet* pointsInInterval = new RooDataSet("pointsInInterval",

                   "points in interval",

                  *(fPointsToTest->get(0)) );


  // loop over points to test

  for(Int_t i=0; i<fPointsToTest->numEntries(); ++i){

     // get a parameter point from the list of points to test.

    point = (RooArgSet*) fPointsToTest->get(i);//->clone("temp");


    // set parameters of interest to current point

    fPOI->assign(*point);


    // set test stat sampler to use this point

    fTestStatSampler->SetParametersForTestStat(*fPOI);


     // get the value of the test statistic for this data set

    double thisTestStatistic = fTestStatSampler->EvaluateTestStatistic(fData, *fPOI );

    /*

    cout << "NC CHECK: " << i << endl;

    point->Print();

    fPOI->Print("v");

    fData.Print();

    cout <<"thisTestStatistic = " << thisTestStatistic << endl;

    */


    // find the lower & upper thresholds on the test statistic that

    // define the acceptance region in the data


    SamplingDistribution* samplingDist=0;

    double sigma;

    double upperEdgeOfAcceptance, upperEdgeMinusSigma, upperEdgePlusSigma;

    double lowerEdgeOfAcceptance, lowerEdgeMinusSigma, lowerEdgePlusSigma;

    Int_t additionalMC=0;


    // the adaptive sampling algorithm wants at least one toy event to be outside

    // of the requested pvalue including the sampling variation.  That leads to an equation

    // N-1 = (1-alpha)N + Z sqrt(N - (1-alpha)N) // for upper limit and

    // 1   = alpha N - Z sqrt(alpha N)  // for lower limit

    //

    // solving for N gives:

    // N = 1/alpha * [3/2 + sqrt(5)] for Z = 1 (which is used currently)

    // thus, a good guess for the first iteration of events is N=3.73/alpha~4/alpha

    // should replace alpha here by smaller tail probability: eg. alpha*Min(leftsideFrac, 1.-leftsideFrac)

    // totalMC will be incremented by 2 before first call, so initiated it at half the value

    Int_t totalMC = (Int_t) (2./fSize/TMath::Min(fLeftSideFraction,1.-fLeftSideFraction));

    if(fLeftSideFraction==0. || fLeftSideFraction ==1.){

      totalMC = (Int_t) (2./fSize);

    }

    // use control

    double tmc = double(totalMC)*fAdditionalNToysFactor;

    totalMC = (Int_t) tmc;


    ToyMCSampler* toyMCSampler = dynamic_cast<ToyMCSampler*>(fTestStatSampler);

    if(fAdaptiveSampling && toyMCSampler) {

      do{

   // this will be executed first, then while conditioned checked

   // as an exit condition for the loop.


   // the next line is where most of the time will be spent

   // generating the sampling dist of the test statistic.

   additionalMC = 2*totalMC; // grow by a factor of two

   samplingDist =

     toyMCSampler->AppendSamplingDistribution(*point,

                     samplingDist,

                     additionalMC);

        if (!samplingDist) {

           oocoutE(nullptr,Eval) << "Neyman Construction: error generating sampling distribution" << endl;

           return 0;

        }

   totalMC=samplingDist->GetSize();


   //cout << "without sigma upper = " <<

   //samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ) << endl;


   sigma = 1;

   upperEdgeOfAcceptance =

     samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ,

                sigma, upperEdgePlusSigma);

   sigma = -1;

   samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ,

              sigma, upperEdgeMinusSigma);


   sigma = 1;

   lowerEdgeOfAcceptance =

     samplingDist->InverseCDF( fLeftSideFraction * fSize ,

                sigma, lowerEdgePlusSigma);

   sigma = -1;

   samplingDist->InverseCDF( fLeftSideFraction * fSize ,

              sigma, lowerEdgeMinusSigma);


   ooccoutD(samplingDist,Eval) << "NeymanConstruction: "

        << "total MC = " << totalMC

        << " this test stat = " << thisTestStatistic << endl

        << " upper edge -1sigma = " << upperEdgeMinusSigma

        << ", upperEdge = "<<upperEdgeOfAcceptance

        << ", upper edge +1sigma = " << upperEdgePlusSigma << endl

        << " lower edge -1sigma = " << lowerEdgeMinusSigma

        << ", lowerEdge = "<<lowerEdgeOfAcceptance

        << ", lower edge +1sigma = " << lowerEdgePlusSigma << endl;

      } while((

         (thisTestStatistic <= upperEdgeOfAcceptance &&

          thisTestStatistic > upperEdgeMinusSigma)

         || (thisTestStatistic >= upperEdgeOfAcceptance &&

        thisTestStatistic < upperEdgePlusSigma)

         || (thisTestStatistic <= lowerEdgeOfAcceptance &&

        thisTestStatistic > lowerEdgeMinusSigma)

         || (thisTestStatistic >= lowerEdgeOfAcceptance &&

        thisTestStatistic < lowerEdgePlusSigma)

      ) && (totalMC < 100./fSize)

         ) ; // need ; here

    } else {

      // the next line is where most of the time will be spent

      // generating the sampling dist of the test statistic.

      samplingDist = fTestStatSampler->GetSamplingDistribution(*point);

      if (!samplingDist) {

         oocoutE(nullptr,Eval) << "Neyman Construction: error generating sampling distribution" << endl;

         return 0;

      }


      lowerEdgeOfAcceptance =

   samplingDist->InverseCDF( fLeftSideFraction * fSize );

      upperEdgeOfAcceptance =

   samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) );

    }


    // add acceptance region to ConfidenceBelt

    if(fConfBelt && fCreateBelt){

      //      cout << "conf belt set " << fConfBelt << endl;

      fConfBelt->AddAcceptanceRegion(*point, i,

                 lowerEdgeOfAcceptance,

                 upperEdgeOfAcceptance);

    }


    // printout some debug info

    ooccoutP(samplingDist,Eval) << "NeymanConstruction: Prog: "<< i+1<<"/"<<fPointsToTest->numEntries()

            << " total MC = " << samplingDist->GetSize()

            << " this test stat = " << thisTestStatistic << endl;

    ooccoutP(samplingDist,Eval) << " ";

    for (auto const *myarg : static_range_cast<RooRealVar *> (*point)){

      ooccoutP(samplingDist,Eval) << myarg->GetName() << "=" << myarg->getVal() << " ";

    }

    ooccoutP(samplingDist,Eval) << "[" << lowerEdgeOfAcceptance << ", "

             << upperEdgeOfAcceptance << "] " << " in interval = " <<

      (thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance)

         << endl << endl;


    // Check if this data is in the acceptance region

    if(thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance) {

      // if so, set this point to true

      //      fPointsToTest->add(*point, 1.);  // this behaves differently for Hist and DataSet

      pointsInInterval->add(*point);

      ++npass;

    }


    if(fSaveBeltToFile){

      //write to file

      samplingDist->Write();

      string tmpName = "hist_";

      tmpName+=samplingDist->GetName();

      TH1F* h = new TH1F(tmpName.c_str(),"",500,0.,5.);

      for(int ii=0; ii<samplingDist->GetSize(); ++ii){

   h->Fill(samplingDist->GetSamplingDistribution().at(ii) );

      }

      h->Write();

      delete h;

    }


    delete samplingDist;

    //    delete point; // from dataset

  }

  oocoutI(pointsInInterval,Eval) << npass << " points in interval" << endl;


  // create an interval based pointsInInterval

  PointSetInterval* interval

    = new PointSetInterval("ClassicalConfidenceInterval", *pointsInInterval);


  if(fSaveBeltToFile){

    //   write belt to file

    fConfBelt->Write();


    f->Close();

  }


  delete f;

  //delete data;

  return interval;

}

fSize
size_t fSize
Definition DeclareConverters.h:343

NeymanConstruction.h

PointSetInterval.h

f
#define f(i)
Definition RSha256.hxx:104

h
#define h(i)
Definition RSha256.hxx:106

RooDataSet.h

RooGlobalFunc.h

RooMsgService.h

oocoutE
#define oocoutE(o, a)
Definition RooMsgService.h:52

oocoutI
#define oocoutI(o, a)
Definition RooMsgService.h:49

ooccoutP
#define ooccoutP(o, a)
Definition RooMsgService.h:58

ooccoutD
#define ooccoutD(o, a)
Definition RooMsgService.h:56

RooStatsUtils.h

Int_t
int Int_t
Definition RtypesCore.h:45

ClassImp
#define ClassImp(name)
Definition Rtypes.h:377

SamplingDistribution.h

TFile.h

data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Definition TGWin32VirtualXProxy.cxx:104

TH1F.h

TMath.h

ToyMCSampler.h

RooAbsCollection::assign
void assign(const RooAbsCollection &other) const
Sets the value, cache and constant attribute of any argument in our set that also appears in the othe...
Definition RooAbsCollection.cxx:340

RooAbsData
RooAbsData is the common abstract base class for binned and unbinned datasets.
Definition RooAbsData.h:59

RooAbsData::get
virtual const RooArgSet * get() const
Definition RooAbsData.h:103

RooAbsData::numEntries
virtual Int_t numEntries() const
Return number of entries in dataset, i.e., count unweighted entries.
Definition RooAbsData.cxx:383

RooArgSet
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:55

RooDataSet
RooDataSet is a container class to hold unbinned data.
Definition RooDataSet.h:57

RooDataSet::add
void add(const RooArgSet &row, double weight=1.0, double weightError=0.0) override
Add one ore more rows of data.
Definition RooDataSet.cxx:1081

RooStats::ConfidenceBelt::AddAcceptanceRegion
void AddAcceptanceRegion(RooArgSet &, AcceptanceRegion region, double cl=-1., double leftside=-1.)
add after creating a region
Definition ConfidenceBelt.cxx:176

RooStats::ModelConfig
ModelConfig is a simple class that holds configuration information specifying how a model should be u...
Definition ModelConfig.h:35

RooStats::ModelConfig::GetParametersOfInterest
const RooArgSet * GetParametersOfInterest() const
get RooArgSet containing the parameter of interest (return nullptr if not existing)
Definition ModelConfig.h:258

RooStats::NeymanConstruction
NeymanConstruction is a concrete implementation of the NeymanConstruction interface that,...
Definition NeymanConstruction.h:36

RooStats::NeymanConstruction::fAdaptiveSampling
bool fAdaptiveSampling
controls use of adaptive sampling algorithm
Definition NeymanConstruction.h:132

RooStats::NeymanConstruction::fPointsToTest
RooAbsData * fPointsToTest
Definition NeymanConstruction.h:129

RooStats::NeymanConstruction::fSize
double fSize
size of the test (eg. specified rate of Type I error)
Definition NeymanConstruction.h:124

RooStats::NeymanConstruction::GetInterval
PointSetInterval * GetInterval() const override
Main interface to get a ConfInterval (will be a PointSetInterval)
Definition NeymanConstruction.cxx:111

RooStats::NeymanConstruction::fConfBelt
ConfidenceBelt * fConfBelt
Definition NeymanConstruction.h:131

RooStats::NeymanConstruction::~NeymanConstruction
~NeymanConstruction() override
default constructor if(fOwnsWorkspace && fWS) delete fWS; if(fConfBelt) delete fConfBelt;
Definition NeymanConstruction.cxx:104

RooStats::NeymanConstruction::fLeftSideFraction
double fLeftSideFraction
Definition NeymanConstruction.h:130

RooStats::NeymanConstruction::fModel
ModelConfig & fModel
Definition NeymanConstruction.h:126

RooStats::NeymanConstruction::fSaveBeltToFile
bool fSaveBeltToFile
controls use if ConfidenceBelt should be saved to a TFile
Definition NeymanConstruction.h:134

RooStats::NeymanConstruction::fData
RooAbsData & fData
data set
Definition NeymanConstruction.h:125

RooStats::NeymanConstruction::NeymanConstruction
NeymanConstruction(RooAbsData &data, ModelConfig &model)
NeymanConstruction();.
Definition NeymanConstruction.cxx:79

RooStats::NeymanConstruction::fAdditionalNToysFactor
double fAdditionalNToysFactor
give user ability to ask for more toys
Definition NeymanConstruction.h:133

RooStats::NeymanConstruction::fCreateBelt
bool fCreateBelt
controls use if ConfidenceBelt should be saved to a TFile
Definition NeymanConstruction.h:135

RooStats::NeymanConstruction::fTestStatSampler
TestStatSampler * fTestStatSampler
Definition NeymanConstruction.h:128

RooStats::PointSetInterval
PointSetInterval is a concrete implementation of the ConfInterval interface.
Definition PointSetInterval.h:21

RooStats::SamplingDistribution
This class simply holds a sampling distribution of some test statistic.
Definition SamplingDistribution.h:28

RooStats::SamplingDistribution::GetSize
Int_t GetSize() const
size of samples
Definition SamplingDistribution.h:62

RooStats::SamplingDistribution::InverseCDF
double InverseCDF(double pvalue)
get the inverse of the Cumulative distribution function
Definition SamplingDistribution.cxx:317

RooStats::SamplingDistribution::GetSamplingDistribution
const std::vector< double > & GetSamplingDistribution() const
Get test statistics values.
Definition SamplingDistribution.h:65

RooStats::TestStatSampler::SetParametersForTestStat
virtual void SetParametersForTestStat(const RooArgSet &)=0
specify the values of parameters used when evaluating test statistic

RooStats::TestStatSampler::EvaluateTestStatistic
virtual double EvaluateTestStatistic(RooAbsData &data, RooArgSet &paramsOfInterest)=0
Main interface to evaluate the test statistic on a dataset.

RooStats::TestStatSampler::GetSamplingDistribution
virtual SamplingDistribution * GetSamplingDistribution(RooArgSet &paramsOfInterest)=0
Main interface to get a ConfInterval, pure virtual.

RooStats::ToyMCSampler
ToyMCSampler is an implementation of the TestStatSampler interface.
Definition ToyMCSampler.h:67

RooStats::ToyMCSampler::AppendSamplingDistribution
virtual SamplingDistribution * AppendSamplingDistribution(RooArgSet &allParameters, SamplingDistribution *last, Int_t additionalMC)
Extended interface to append to sampling distribution more samples.
Definition ToyMCSampler.cxx:657

TFile
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition TFile.h:51

TH1F
1-D histogram with a float per channel (see TH1 documentation)}
Definition TH1.h:577

TNamed::GetName
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47

TObject::Write
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition TObject.cxx:874

double

int

sigma
const Double_t sigma
Definition h1analysisProxy.h:11

RooFit
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition Common.h:18

RooFit::Eval
@ Eval
Definition RooGlobalFunc.h:63

RooFit::Contents
@ Contents
Definition RooGlobalFunc.h:64

RooStats
Namespace for the RooStats classes.
Definition Asimov.h:19

TMath::Min
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Definition TMathBase.h:198

ModelConfig.h