134 const TString& theWeightFile) :
291 Log() << kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: "
293 <<
" --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
330 Log() << kVERBOSE <<
"interpreted option string: vRangeMethod: '"
337 Log() << kVERBOSE <<
"nEventsMin/Max, maxVIterations, initialScale: "
351 if (
IsNormalised())
Log() << kFATAL <<
"\"Normalise\" option cannot be used with PDERS; "
352 <<
"please remove the option from the configuration string, or "
353 <<
"use \"!Normalise\""
482 Log() << kFATAL <<
"GetNvar() == 0" <<
Endl;
503 Log() << kFATAL <<
"<SetVolumeElement> RMS not computed: " <<
fAverageRMS.size() <<
Endl;
505 Log() << kVERBOSE <<
"delta of var[" << (*fInputVars)[ivar]
507 <<
"\t | comp with |max - min|: " << (
GetXmax( ivar ) -
GetXmin( ivar ))
517 Log() << kFATAL <<
"<SetVolumeElement> unknown range-set mode: "
520 (*fShift)[ivar] = 0.5;
530 return ThisPDERS()->GetVolumeContentForRoot( scale );
539 v.ScaleInterval( scale );
548 std::vector<const BinarySearchTreeNode*>& events,
559#ifdef TMVA_MethodPDERS__countByHand__Debug__
564 Int_t iS = 0, iB = 0;
566 for (
UInt_t ievt_=0; ievt_<
Data()->GetNTrainingEvents(); ievt_++) {
569 for (
Int_t ivar=0; ivar<nvar; ivar++) {
571 inV = (
x > (*volume->Lower)[ivar] &&
x <= (*volume->Upper)[ivar]);
578 Log() << kVERBOSE <<
"debug: my test: " << in <<
Endl;
579 Log() << kVERBOSE <<
"debug: binTree: " << count <<
Endl <<
Endl;
587 std::vector<Double_t> *lb =
new std::vector<Double_t>(
GetNvar() );
588 for (
UInt_t ivar=0; ivar<
GetNvar(); ivar++) (*lb)[ivar] =
e.GetValue(ivar);
589 std::vector<Double_t> *ub =
new std::vector<Double_t>( *lb );
591 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
592 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
605 if (MethodPDERS_UseFindRoot) {
612 RootFinder rootFinder(
this, 0.01, 50, 200, 10 );
636 if (i_ > 50)
Log() << kWARNING <<
"warning in event: " <<
e
637 <<
": adaptive volume pre-adjustment reached "
638 <<
">50 iterations in while loop (" << i_ <<
")" <<
Endl;
646 Float_t nEventsBest = nEventsN;
649 if (nEventsN < fNEventsMin || nEventsN >
fNEventsMax) {
653 v->ScaleInterval( scale );
657 if (nEventsN > 1 && nEventsN - nEventsO != 0)
658 if (scaleN - scaleO != 0)
659 scale += (scaleN - scaleO)/(nEventsN - nEventsO)*(nEventsE - nEventsN);
670 (nEventsN >=
fNEventsMin || nEventsBest < nEventsN)) {
671 nEventsBest = nEventsN;
682 nEventsN = nEventsBest;
684 if (nEventsN < fNEventsMin-1 || nEventsN >
fNEventsMax+1)
685 Log() << kWARNING <<
"warning in event " <<
e
686 <<
": adaptive volume adjustment reached "
725 Log() << kWARNING <<
"warning in event" <<
e
726 <<
": kNN volume adjustment reached "
736 dim_normalization [ivar] = 1.0 / ((*
v.fUpper)[ivar] - (*
v.fLower)[ivar]);
739 std::vector<const BinarySearchTreeNode*> tempVector;
742 std::vector<Double_t> *distances =
new std::vector<Double_t>( kNNcount );
749 std::vector<Double_t>::iterator wsk = distances->begin();
751 std::nth_element( distances->begin(), wsk, distances->end() );
758 if (dist <= (*distances)[
fkNNMin-1])
759 tempVector.push_back( events[j] );
764 delete[] dim_normalization;
779 std::vector<const BinarySearchTreeNode*> events;
784 std::vector<Double_t> *lb =
new std::vector<Double_t>(
GetNvar() );
785 for (
UInt_t ivar=0; ivar<
GetNvar(); ivar++) (*lb)[ivar] =
e.GetValue(ivar);
787 std::vector<Double_t> *ub =
new std::vector<Double_t>( *lb );
789 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
790 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
808 std::vector<const BinarySearchTreeNode*> events;
813 std::vector<Double_t> *lb =
new std::vector<Double_t>(
GetNvar() );
814 for (
UInt_t ivar=0; ivar<
GetNvar(); ivar++) (*lb)[ivar] =
e.GetValue(ivar);
816 std::vector<Double_t> *ub =
new std::vector<Double_t>( *lb );
818 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
819 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
834 std::vector<const BinarySearchTreeNode*>& events,
Volume&
v )
838 dim_normalization [ivar] = 2 / ((*
v.fUpper)[ivar] - (*
v.fLower)[ivar]);
844 for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); ++iev) {
861 delete[] dim_normalization;
863 if (pdfSumS < 1
e-20 && pdfSumB < 1
e-20)
return 0.5;
864 if (pdfSumB < 1
e-20)
return 1.0;
865 if (pdfSumS < 1
e-20)
return 0.0;
868 return 1.0/(
r + 1.0);
875 std::vector<const BinarySearchTreeNode*>& events,
Volume&
v,
876 std::vector<Float_t>* pdfSum )
880 dim_normalization [ivar] = 2 / ((*
v.fUpper)[ivar] - (*
v.fLower)[ivar]);
888 pdfSum->push_back( 0 );
891 for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); ++iev) {
901 pdfSum->at(ivar) +=
ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight() * (*iev)->GetTargets()[ivar];
906 delete[] dim_normalization;
912 pdfSum->at(ivar) /= pdfDiv;
929 return (1 - normalized_distance);
940 return NormSinc (side_crossings * normalized_distance);
979 if (
ret != 0.0)
return ret*pdf;
1085 if (
d < 1
e-10)
return 1;
1088 Float_t err =
f*countB*countB*sumW2S +
f*countS*countS*sumW2B;
1090 if (
err < 1
e-10)
return 1;
1104 Log() << kFATAL <<
"Signal and background binary search tree not available" <<
Endl;
1116 Log() << kFATAL <<
"Could not create BinarySearchTree from XML" <<
Endl;
1118 Log() << kFATAL <<
"Could not create BinarySearchTree from XML" <<
Endl;
1198 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
1199 fout <<
"};" << std::endl;
1213 Log() <<
"PDERS is a generalization of the projective likelihood classifier " <<
Endl;
1214 Log() <<
"to N dimensions, where N is the number of input variables used." <<
Endl;
1215 Log() <<
"In its adaptive form it is mostly equivalent to k-Nearest-Neighbor" <<
Endl;
1216 Log() <<
"(k-NN) methods. If the multidimensional PDF for signal and background" <<
Endl;
1217 Log() <<
"were known, this classifier would exploit the full information" <<
Endl;
1218 Log() <<
"contained in the input variables, and would hence be optimal. In " <<
Endl;
1219 Log() <<
"practice however, huge training samples are necessary to sufficiently " <<
Endl;
1220 Log() <<
"populate the multidimensional phase space. " <<
Endl;
1222 Log() <<
"The simplest implementation of PDERS counts the number of signal" <<
Endl;
1223 Log() <<
"and background events in the vicinity of a test event, and returns" <<
Endl;
1224 Log() <<
"a weight according to the majority species of the neighboring events." <<
Endl;
1225 Log() <<
"A more involved version of PDERS (selected by the option \"KernelEstimator\")" <<
Endl;
1226 Log() <<
"uses Kernel estimation methods to approximate the shape of the PDF." <<
Endl;
1230 Log() <<
"PDERS can be very powerful in case of strongly non-linear problems, " <<
Endl;
1231 Log() <<
"e.g., distinct islands of signal and background regions. Because of " <<
Endl;
1232 Log() <<
"the exponential growth of the phase space, it is important to restrict" <<
Endl;
1233 Log() <<
"the number of input variables (dimension) to the strictly necessary." <<
Endl;
1235 Log() <<
"Note that PDERS is a slowly responding classifier. Moreover, the necessity" <<
Endl;
1236 Log() <<
"to store the entire binary tree in memory, to avoid accessing virtual " <<
Endl;
1237 Log() <<
"memory, limits the number of training events that can effectively be " <<
Endl;
1238 Log() <<
"used to model the multidimensional PDF." <<
Endl;
1242 Log() <<
"If the PDERS response is found too slow when using the adaptive volume " <<
Endl;
1243 Log() <<
"size (option \"VolumeRangeMode=Adaptive\"), it might be found beneficial" <<
Endl;
1244 Log() <<
"to reduce the number of events required in the volume, and/or to enlarge" <<
Endl;
1245 Log() <<
"the allowed range (\"NeventsMin/Max\"). PDERS is relatively insensitive" <<
Endl;
1246 Log() <<
"to the width (\"GaussSigma\") of the Gaussian kernel (if used)." <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
int Int_t
Signed integer 4 bytes (int).
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
bool Bool_t
Boolean (0=false, 1=true) (bool).
double Double_t
Double 8 bytes.
float Float_t
Float 4 bytes (float).
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
Node for the BinarySearch or Decision Trees.
const std::vector< Float_t > & GetEventV() const
A simple Binary search tree including a volume search method.
static BinarySearchTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=262657)
re-create a new tree (decision tree or search tree) from XML
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
Class that contains all the data information.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
UInt_t GetNTargets() const
accessor to the number of targets
Float_t GetTarget(UInt_t itgt) const
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
Double_t GetXmin(Int_t ivar) const
TString GetMethodTypeName() const
Bool_t IgnoreEventsWithNegWeightsInTraining() const
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
Bool_t DoRegression() const
std::vector< Float_t > * fRegressionReturnVal
const Event * GetEvent() const
Double_t GetXmax(Int_t ivar) const
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
void SetSignalReferenceCut(Double_t cut)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Bool_t IsNormalised() const
const Event * GetTrainingEvent(Long64_t ievt) const
This is a generalization of the above Likelihood methods to dimensions, where is the number of inpu...
void ReadWeightsFromXML(void *wghtnode) override
void DeclareOptions() override
define the options (their key words) that can be set in the option string.
void GetHelpMessage() const override
get help message text
void WriteWeightsToStream(TFile &rf) const
write training sample (TTree) to file
static MethodPDERS *& GetMethodPDERSThreadLocal()
void CreateBinarySearchTree(Types::ETreeType type)
create binary search trees for signal and background
BinarySearchTree * fBinaryTree
binary tree
std::vector< Float_t > * fDelta
size of volume
void MakeClassSpecific(std::ostream &, const TString &) const override
write specific classifier response
virtual ~MethodPDERS(void)
destructor
Int_t fkNNMin
min number of events in kNN tree
Bool_t fInitializedVolumeEle
is volume element initialized ?
MethodPDERS(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor for the PDERS method
Float_t fDeltaFrac
fraction of RMS
void GetSample(const Event &e, std::vector< const BinarySearchTreeNode * > &events, Volume *volume)
void ReadWeightsFromStream(std::istream &istr) override
read weight info from file
Double_t fMax_distance
maximum distance
void ProcessOptions() override
process the options specified by the user
Double_t fGaussSigma
size of Gauss in adaptive volume
@ kSinc3
the sinc enumerators must be consecutive and in order!
void AddWeightsXMLTo(void *parent) const override
write weights to xml file
Float_t GetError(Float_t countS, Float_t countB, Float_t sumW2S, Float_t sumW2B) const
statistical error estimate for RS estimator
BinarySearchTree * GetBinaryTree(void) const
static MethodPDERS * ThisPDERS(void)
static pointer to this object
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
init the size of a volume element using a defined fraction of the volume containing the entire events
void Train(void) override
this is a dummy training: the preparation work to do is the construction of the binary tree as a poin...
Double_t KernelNormalization(Double_t pdf)
Calculating the normalization factor only once (might need a reset at some point.
std::vector< Float_t > fAverageRMS
average RMS of signal and background
Int_t fkNNMax
max number of events in kNN tree
Float_t fNEventsMax
maximum number of events in adaptive volume
Float_t fScaleS
weight for signal events
Float_t fInitialScale
initial scale for adaptive volume
void RRScalc(const Event &, std::vector< Float_t > *count)
void UpdateThis()
update static this pointer
Double_t CRScalc(const Event &)
Float_t fScaleB
weight for background events
Double_t fGaussSigmaNorm
size of Gauss in adaptive volume (normalised to dimensions)
std::vector< Float_t > * fShift
volume center
void Init(void) override
default initialisation routine called by all constructors
void CalcAverages()
compute also average RMS values required for adaptive Gaussian
enum TMVA::MethodPDERS::EVolumeRangeMode fVRangeMode
void RKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &, std::vector< Float_t > *pdfSum)
normalization factors so we can work with radius 1 hyperspheres
enum TMVA::MethodPDERS::EKernelEstimator fKernelEstimator
Double_t NormSinc(Double_t x)
NormSinc.
void SetVolumeElement(void)
defines volume dimensions
const std::vector< Float_t > & GetRegressionValues() override
Double_t LanczosFilter(Int_t level, Double_t x)
Lanczos Filter.
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
PDERS can handle classification with 2 classes and regression with one or more regression-targets.
Double_t CKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &)
normalization factors so we can work with radius 1 hyperspheres
Float_t fMaxVIterations
maximum number of iterations to adapt volume size
Double_t ApplyKernelFunction(Double_t normalized_distance)
from the normalized euclidean distance calculate the distance for a certain kernel
Bool_t fNormTree
binary-search tree is normalised
Double_t GetNormalizedDistance(const TMVA::Event &base_event, const BinarySearchTreeNode &sample_event, Double_t *dim_normalization)
We use Euclidian metric here. Might not be best or most efficient.
static Double_t IGetVolumeContentForRoot(Double_t)
Interface to RootFinder.
Float_t fNEventsMin
minimum number of events in adaptive volume
Double_t GetVolumeContentForRoot(Double_t)
count number of events in rescaled volume
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
Singleton class for Global types used by TMVA.
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
Volume for BinarySearchTree.
void ScaleInterval(Double_t f)
"scale" the volume by symmetrically blowing up the interval in each dimension
const Bool_t MethodPDERS_UseFindRoot
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculates a gaussian function with mean and sigma.
Double_t Sqrt(Double_t x)
Returns the square root of x.
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Returns x raised to the power y.
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Double_t Sin(Double_t)
Returns the sine of an angle of x radians.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.