33#ifndef ROOT_TMVA_RuleFitParams
34#define ROOT_TMVA_RuleFitParams
73 fGDTauMin = (t0>1.0 ? 1.0:(t0<0.0 ? 0.0:t0));
130 typedef std::vector<const TMVA::Event *>::const_iterator
EventItr;
173 std::vector<Double_t> &avsel,
174 std::vector<Double_t> &avrul);
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t np
Bool_t operator()(Double_t first, Double_t second) const
ostringstream derivative to redirect and format output
A class doing the actual fitting of a linear model using rules as base functions.
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...
void MakeGDPath()
The following finds the gradient directed path in parameter space.
std::vector< std::vector< Double_t > > fGDCoefLinTst
linear coeffs - one per tau
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
RuleFit * fRuleFit
rule fit
Double_t fGDTau
selected threshold parameter (tau in eq 26, ref 1)
Double_t fNTCoefRad
GD path: 'radius' of all rulecoeffs.
Double_t * fNTLinCoeff
GD path: linear coefficients.
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Int_t fGDNPathSteps
number of path steps
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk assessment
Double_t RiskPerf(UInt_t itau) const
Double_t Optimism()
implementation of eq.
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
UInt_t fPerfIdx2
last event index for performance evaluation
void SetGDPathStep(Double_t s)
std::vector< Double_t > fAverageSelectorPath
average of each variable over the range fPathIdx1,2
void EvaluateAveragePerf()
UInt_t fGDTauScan
number scan for tau-paths
std::vector< Double_t > fGradVecLin
gradient vector - dimension = number of variables
Double_t fsigrms
Rms of F(sig)
Double_t fNTRisk
GD path: risk.
virtual ~RuleFitParams()
destructor
RuleFitParams()
constructor
void SetRuleFit(RuleFit *rf)
Double_t * fNTCoeff
GD path: rule coefficients.
std::vector< constTMVA::Event * >::const_iterator EventItr
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
std::vector< Double_t > fAverageRulePerf
average of each rule, same range
Double_t fNEveEffPerf
idem for Perf events
void SetMsgType(EMsgType t)
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
std::vector< std::vector< Double_t > > fGradVecLinTst
gradient vector, linear terms - one per tau
void SetGDTauRange(Double_t t0, Double_t t1)
Double_t fNEveEffPath
sum of weights for Path events
std::vector< std::vector< Double_t > > fGDCoefTst
rule coeffs - one per tau
Double_t RiskPath() const
MsgLogger * fLogger
! message logger
void FillCoefficients()
helper function to store the rule coefficients in local arrays
std::vector< Double_t > fGDTauVec
the tau's
Double_t fsigave
Sigma of current signal score function F(sig)
void SetGDErrScale(Double_t s)
UInt_t fPathIdx2
last event index for path search
void InitGD()
Initialize GD path search.
UInt_t fPathIdx1
first event index for path search
void EvaluateAveragePath()
Double_t fNTNuval
GD path: value of nu.
TTree * fGDNtuple
Gradient path ntuple, contains params for each step along the path.
std::vector< Double_t > fGDErrTst
error rates per tau
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
void SetGDTauPrec(Double_t p)
std::vector< Double_t > fGradVec
gradient vector - dimension = number of rules in ensemble
std::vector< Double_t > fAverageSelectorPerf
average of each variable over the range fPerfIdx1,2
Double_t fGDTauMax
max threshold parameter (tau in eq 26, ref 1)
std::vector< std::vector< Double_t > > fGradVecTst
gradient vector - one per tau
std::vector< Char_t > fGDErrTstOK
error rate is sufficiently low <— stores boolean
RuleEnsemble * fRuleEnsemble
rule ensemble
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
UInt_t GetPathIdx2() const
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
UInt_t GetPerfIdx2() const
Double_t fbkgave
Average of F(bkg)
Double_t CalcAverageTruth()
calculate the average truth
void SetGDTauScan(UInt_t n)
Double_t fFstarMedian
median value of F*() using
Double_t fbkgrms
Rms of F(bkg)
Double_t fAverageTruth
average truth, ie sum(y)/N, y=+-1
std::vector< Double_t > fFstar
vector of F*() - filled in CalcFStar()
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
Double_t fGDErrScale
stop scan at error = scale*errmin
Double_t RiskPerf() const
void MakeGradientVector()
make gradient vector
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
Double_t fGDTauMin
min threshold parameter (tau in eq 26, ref 1)
UInt_t fNLinear
number of linear terms
UInt_t GetPerfIdx1() const
void InitNtuple()
initializes the ntuple
UInt_t fPerfIdx1
first event index for performance evaluation
Double_t CalcAverageResponseOLD()
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
void SetGDTau(Double_t t)
Double_t fGDTauPrec
precision in tau
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment.
std::vector< Double_t > fGDOfsTst
offset per tau
Double_t fNTOffset
GD path: model offset.
UInt_t fGDNTau
number of tau-paths - calculated in SetGDTauPrec
UInt_t GetPathIdx1() const
Double_t fGDPathStep
step size along path (delta nu in eq 22, ref 1)
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
UInt_t fNRules
number of rules
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
Estimates the error rate with the current set of parameters.
UInt_t fGDNTauTstOK
number of tau in the test-phase that are ok
Double_t fNTErrorRate
GD path: error rate (or performance)
std::vector< Double_t > fAverageRulePath
average of each rule, same range
void SetGDNPathSteps(Int_t np)
A class implementing various fits of rule ensembles.
A TTree represents a columnar dataset.
create variable transformations
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.