105 fVRangeMode(kAdaptive),
106 fKernelEstimator(
kBox),
119 fInitializedVolumeEle(0),
137 fVRangeMode(kAdaptive),
138 fKernelEstimator(
kBox),
151 fInitializedVolumeEle(0),
183 fVRangeMode = kAdaptive;
184 fKernelEstimator =
kBox;
189 fMaxVIterations = 150;
190 fInitialScale = 0.99;
194 fkNNMin =
Int_t(fNEventsMin);
195 fkNNMax =
Int_t(fNEventsMax);
197 fInitializedVolumeEle =
kFALSE;
201 SetSignalReferenceCut( 0.0 );
209 if (fDelta)
delete fDelta;
210 if (fShift)
delete fShift;
212 if (
NULL != fBinaryTree)
delete fBinaryTree;
253 DeclareOptionRef(fVolumeRange=
"Adaptive",
"VolumeRangeMode",
"Method to determine volume size");
254 AddPreDefVal(
TString(
"Unscaled"));
255 AddPreDefVal(
TString(
"MinMax"));
257 AddPreDefVal(
TString(
"Adaptive"));
260 DeclareOptionRef(fKernelString=
"Box",
"KernelEstimator",
"Kernel estimation function");
262 AddPreDefVal(
TString(
"Sphere"));
263 AddPreDefVal(
TString(
"Teepee"));
264 AddPreDefVal(
TString(
"Gauss"));
265 AddPreDefVal(
TString(
"Sinc3"));
266 AddPreDefVal(
TString(
"Sinc5"));
267 AddPreDefVal(
TString(
"Sinc7"));
268 AddPreDefVal(
TString(
"Sinc9"));
269 AddPreDefVal(
TString(
"Sinc11"));
270 AddPreDefVal(
TString(
"Lanczos2"));
271 AddPreDefVal(
TString(
"Lanczos3"));
272 AddPreDefVal(
TString(
"Lanczos5"));
273 AddPreDefVal(
TString(
"Lanczos8"));
276 DeclareOptionRef(fDeltaFrac ,
"DeltaFrac",
"nEventsMin/Max for minmax and rms volume range");
277 DeclareOptionRef(fNEventsMin ,
"NEventsMin",
"nEventsMin for adaptive volume range");
278 DeclareOptionRef(fNEventsMax ,
"NEventsMax",
"nEventsMax for adaptive volume range");
279 DeclareOptionRef(fMaxVIterations,
"MaxVIterations",
"MaxVIterations for adaptive volume range");
280 DeclareOptionRef(fInitialScale ,
"InitialScale",
"InitialScale for adaptive volume range");
281 DeclareOptionRef(fGaussSigma ,
"GaussSigma",
"Width (wrt volume size) of Gaussian kernel estimator");
282 DeclareOptionRef(fNormTree ,
"NormTree",
"Normalize binary search tree");
290 if (IgnoreEventsWithNegWeightsInTraining()) {
291 Log() << kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: "
292 << GetMethodTypeName()
293 <<
" --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
297 fGaussSigmaNorm = fGaussSigma;
301 if (fVolumeRange ==
"MinMax" ) fVRangeMode = kMinMax;
302 else if (fVolumeRange ==
"RMS" ) fVRangeMode = kRMS;
303 else if (fVolumeRange ==
"Adaptive" ) fVRangeMode = kAdaptive;
304 else if (fVolumeRange ==
"Unscaled" ) fVRangeMode = kUnscaled;
305 else if (fVolumeRange ==
"kNN" ) fVRangeMode = kkNN;
307 Log() << kFATAL <<
"VolumeRangeMode parameter '" << fVolumeRange <<
"' unknown" <<
Endl;
310 if (fKernelString ==
"Box" ) fKernelEstimator =
kBox;
311 else if (fKernelString ==
"Sphere" ) fKernelEstimator = kSphere;
312 else if (fKernelString ==
"Teepee" ) fKernelEstimator = kTeepee;
313 else if (fKernelString ==
"Gauss" ) fKernelEstimator =
kGauss;
314 else if (fKernelString ==
"Sinc3" ) fKernelEstimator = kSinc3;
315 else if (fKernelString ==
"Sinc5" ) fKernelEstimator = kSinc5;
316 else if (fKernelString ==
"Sinc7" ) fKernelEstimator = kSinc7;
317 else if (fKernelString ==
"Sinc9" ) fKernelEstimator = kSinc9;
318 else if (fKernelString ==
"Sinc11" ) fKernelEstimator = kSinc11;
319 else if (fKernelString ==
"Lanczos2" ) fKernelEstimator = kLanczos2;
320 else if (fKernelString ==
"Lanczos3" ) fKernelEstimator = kLanczos3;
321 else if (fKernelString ==
"Lanczos5" ) fKernelEstimator = kLanczos5;
322 else if (fKernelString ==
"Lanczos8" ) fKernelEstimator = kLanczos8;
323 else if (fKernelString ==
"Trim" ) fKernelEstimator = kTrim;
325 Log() << kFATAL <<
"KernelEstimator parameter '" << fKernelString <<
"' unknown" <<
Endl;
330 Log() << kVERBOSE <<
"interpreted option string: vRangeMethod: '"
331 << (
const char*)((fVRangeMode == kMinMax) ?
"MinMax" :
332 (fVRangeMode == kUnscaled) ?
"Unscaled" :
333 (fVRangeMode == kRMS ) ?
"RMS" :
"Adaptive") <<
"'" <<
Endl;
334 if (fVRangeMode == kMinMax || fVRangeMode == kRMS)
335 Log() << kVERBOSE <<
"deltaFrac: " << fDeltaFrac <<
Endl;
337 Log() << kVERBOSE <<
"nEventsMin/Max, maxVIterations, initialScale: "
338 << fNEventsMin <<
" " << fNEventsMax
339 <<
" " << fMaxVIterations <<
" " << fInitialScale <<
Endl;
340 Log() << kVERBOSE <<
"KernelEstimator = " << fKernelString <<
Endl;
351 if (IsNormalised()) Log() << kFATAL <<
"\"Normalise\" option cannot be used with PDERS; "
352 <<
"please remove the option from the configuration string, or "
353 <<
"use \"!Normalise\""
361 fInitializedVolumeEle =
kTRUE;
371 if (fInitializedVolumeEle ==
kFALSE) {
372 fInitializedVolumeEle =
kTRUE;
384 return this->CRScalc( *GetEvent() );
391 if (fRegressionReturnVal == 0) fRegressionReturnVal =
new std::vector<Float_t>;
392 fRegressionReturnVal->clear();
395 if (fInitializedVolumeEle ==
kFALSE) {
396 fInitializedVolumeEle =
kTRUE;
407 this->RRScalc( *
ev, fRegressionReturnVal );
411 for (std::vector<Float_t>::iterator it = fRegressionReturnVal->begin(); it != fRegressionReturnVal->end(); ++it ) {
416 const Event*
evT2 = GetTransformationHandler().InverseTransform(
evT );
417 fRegressionReturnVal->clear();
420 fRegressionReturnVal->push_back(
evT2->GetTarget(
ivar));
426 return (*fRegressionReturnVal);
434 if (fVRangeMode == kAdaptive || fVRangeMode == kRMS || fVRangeMode == kkNN ) {
436 fBinaryTree->CalcStatistics();
439 if (!DoRegression()){
442 fAverageRMS.push_back( (
rmsS +
rmsB)*0.5 );
445 fAverageRMS.push_back(
rms );
456 if (
NULL != fBinaryTree)
delete fBinaryTree;
459 fBinaryTree->SetNormalize(
kTRUE );
462 fBinaryTree->Fill( GetEventCollection(
type) );
465 fBinaryTree->NormalizeTree();
468 if (!DoRegression()) {
473 Log() << kVERBOSE <<
"Signal and background scales: " << fScaleS <<
" " << fScaleB <<
Endl;
482 Log() << kFATAL <<
"GetNvar() == 0" <<
Endl;
487 fkNNMin =
Int_t(fNEventsMin);
488 fkNNMax =
Int_t(fNEventsMax);
490 if (fDelta)
delete fDelta;
491 if (fShift)
delete fShift;
492 fDelta =
new std::vector<Float_t>( GetNvar() );
493 fShift =
new std::vector<Float_t>( GetNvar() );
496 switch (fVRangeMode) {
502 if (fAverageRMS.size() != GetNvar())
503 Log() << kFATAL <<
"<SetVolumeElement> RMS not computed: " << fAverageRMS.size() <<
Endl;
504 (*fDelta)[
ivar] = fAverageRMS[
ivar]*fDeltaFrac;
505 Log() << kVERBOSE <<
"delta of var[" << (*fInputVars)[
ivar]
506 <<
"\t]: " << fAverageRMS[
ivar]
507 <<
"\t | comp with |max - min|: " << (GetXmax(
ivar ) - GetXmin(
ivar ))
511 (*fDelta)[
ivar] = (GetXmax(
ivar ) - GetXmin(
ivar ))*fDeltaFrac;
514 (*fDelta)[
ivar] = fDeltaFrac;
517 Log() << kFATAL <<
"<SetVolumeElement> unknown range-set mode: "
518 << fVRangeMode <<
Endl;
520 (*fShift)[
ivar] = 0.5;
530 return ThisPDERS()->GetVolumeContentForRoot(
scale );
541 Double_t count = GetBinaryTree()->SearchVolume( &
v );
548 std::vector<const BinarySearchTreeNode*>& events,
559#ifdef TMVA_MethodPDERS__countByHand__Debug__
562 count = fBinaryTree->SearchVolume( volume );
571 inV = (
x > (*volume->Lower)[
ivar] &&
x <= (*volume->Upper)[
ivar]);
578 Log() << kVERBOSE <<
"debug: my test: " <<
in <<
Endl;
579 Log() << kVERBOSE <<
"debug: binTree: " << count <<
Endl <<
Endl;
585 if (fVRangeMode == kRMS || fVRangeMode == kMinMax || fVRangeMode == kUnscaled) {
587 std::vector<Double_t> *
lb =
new std::vector<Double_t>( GetNvar() );
589 std::vector<Double_t> *
ub =
new std::vector<Double_t>( *
lb );
597 fBinaryTree->SearchVolume(
svolume, &events );
599 else if (fVRangeMode == kAdaptive) {
605 if (MethodPDERS_UseFindRoot) {
609 fHelpVolume = volume;
617 fBinaryTree->SearchVolume( volume, &events );
625 count = fBinaryTree->SearchVolume( volume );
632 count = fBinaryTree->SearchVolume( volume );
636 if (
i_ > 50) Log() << kWARNING <<
"warning in event: " <<
e
637 <<
": adaptive volume pre-adjustment reached "
638 <<
">50 iterations in while loop (" <<
i_ <<
")" <<
Endl;
653 v->ScaleInterval(
scale );
654 nEventsN = fBinaryTree->SearchVolume(
v );
685 Log() << kWARNING <<
"warning in event " <<
e
686 <<
": adaptive volume adjustment reached "
687 <<
"max. #iterations (" << fMaxVIterations <<
")"
688 <<
"[ nEvents: " <<
nEventsN <<
" " << fNEventsMin <<
" " << fNEventsMax <<
"]"
692 fBinaryTree->SearchVolume( volume, &events );
697 }
else if (fVRangeMode == kkNN) {
702 Int_t kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &
v, &events, fkNNMax+1 );
720 kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &
v, &events, fkNNMax+1 );
724 if (
t_times == fMaxVIterations) {
725 Log() << kWARNING <<
"warning in event" <<
e
726 <<
": kNN volume adjustment reached "
727 <<
"max. #iterations (" << fMaxVIterations <<
")"
728 <<
"[ kNN: " << fkNNMin <<
" " << fkNNMax <<
Endl;
739 std::vector<const BinarySearchTreeNode*>
tempVector;
761 fMax_distance = (*distances)[fkNNMin-1];
770 Log() << kFATAL <<
"<GetSample> unknown RangeMode: " << fVRangeMode <<
Endl;
779 std::vector<const BinarySearchTreeNode*> events;
784 std::vector<Double_t> *
lb =
new std::vector<Double_t>( GetNvar() );
787 std::vector<Double_t> *
ub =
new std::vector<Double_t>( *
lb );
795 GetSample(
e, events, volume );
796 Double_t count = CKernelEstimate(
e, events, *volume );
808 std::vector<const BinarySearchTreeNode*> events;
813 std::vector<Double_t> *
lb =
new std::vector<Double_t>( GetNvar() );
816 std::vector<Double_t> *
ub =
new std::vector<Double_t>( *
lb );
823 GetSample(
e, events, volume );
824 RKernelEstimate(
e, events, *volume, count );
834 std::vector<const BinarySearchTreeNode*>& events,
Volume&
v )
844 for (std::vector<const BinarySearchTreeNode*>::iterator
iev = events.begin();
iev != events.end(); ++
iev) {
853 if ( (*iev)->GetClass()==fSignalClass )
868 return 1.0/(
r + 1.0);
875 std::vector<const BinarySearchTreeNode*>& events,
Volume&
v,
876 std::vector<Float_t>*
pdfSum )
891 for (std::vector<const BinarySearchTreeNode*>::iterator
iev = events.begin();
iev != events.end(); ++
iev) {
923 switch (fKernelEstimator) {
962 Log() << kFATAL <<
"Kernel estimation function unsupported. Enumerator is " << fKernelEstimator <<
Endl;
979 if (
ret != 0.0)
return ret*pdf;
982 switch (fKernelEstimator) {
1008 Log() << kFATAL <<
"Kernel estimation function unsupported. Enumerator is " << fKernelEstimator <<
Endl;
1085 if (
d < 1
e-10)
return 1;
1090 if (err < 1
e-10)
return 1;
1102 fBinaryTree->AddXMLTo(
wght);
1104 Log() << kFATAL <<
"Signal and background binary search tree not available" <<
Endl;
1112 if (
NULL != fBinaryTree)
delete fBinaryTree;
1116 Log() << kFATAL <<
"Could not create BinarySearchTree from XML" <<
Endl;
1118 Log() << kFATAL <<
"Could not create BinarySearchTree from XML" <<
Endl;
1119 fBinaryTree->SetPeriode( GetNvar() );
1120 fBinaryTree->CalcStatistics();
1121 fBinaryTree->CountNodes();
1128 Log() << kINFO <<
"signal and background scales: " << fScaleS <<
" " << fScaleB <<
Endl;
1131 fInitializedVolumeEle =
kTRUE;
1139 if (
NULL != fBinaryTree)
delete fBinaryTree;
1143 istr >> *fBinaryTree;
1145 fBinaryTree->SetPeriode( GetNvar() );
1147 fBinaryTree->CalcStatistics();
1149 fBinaryTree->CountNodes();
1155 Log() << kINFO <<
"signal and background scales: " << fScaleS <<
" " << fScaleB <<
Endl;
1161 fInitializedVolumeEle =
kTRUE;
1183 return GetMethodPDERSThreadLocal();
1190 GetMethodPDERSThreadLocal() =
this;
1198 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
1199 fout <<
"};" << std::endl;
1213 Log() <<
"PDERS is a generalization of the projective likelihood classifier " <<
Endl;
1214 Log() <<
"to N dimensions, where N is the number of input variables used." <<
Endl;
1215 Log() <<
"In its adaptive form it is mostly equivalent to k-Nearest-Neighbor" <<
Endl;
1216 Log() <<
"(k-NN) methods. If the multidimensional PDF for signal and background" <<
Endl;
1217 Log() <<
"were known, this classifier would exploit the full information" <<
Endl;
1218 Log() <<
"contained in the input variables, and would hence be optimal. In " <<
Endl;
1219 Log() <<
"practice however, huge training samples are necessary to sufficiently " <<
Endl;
1220 Log() <<
"populate the multidimensional phase space. " <<
Endl;
1222 Log() <<
"The simplest implementation of PDERS counts the number of signal" <<
Endl;
1223 Log() <<
"and background events in the vicinity of a test event, and returns" <<
Endl;
1224 Log() <<
"a weight according to the majority species of the neighboring events." <<
Endl;
1225 Log() <<
"A more involved version of PDERS (selected by the option \"KernelEstimator\")" <<
Endl;
1226 Log() <<
"uses Kernel estimation methods to approximate the shape of the PDF." <<
Endl;
1230 Log() <<
"PDERS can be very powerful in case of strongly non-linear problems, " <<
Endl;
1231 Log() <<
"e.g., distinct islands of signal and background regions. Because of " <<
Endl;
1232 Log() <<
"the exponential growth of the phase space, it is important to restrict" <<
Endl;
1233 Log() <<
"the number of input variables (dimension) to the strictly necessary." <<
Endl;
1235 Log() <<
"Note that PDERS is a slowly responding classifier. Moreover, the necessity" <<
Endl;
1236 Log() <<
"to store the entire binary tree in memory, to avoid accessing virtual " <<
Endl;
1237 Log() <<
"memory, limits the number of training events that can effectively be " <<
Endl;
1238 Log() <<
"used to model the multidimensional PDF." <<
Endl;
1242 Log() <<
"If the PDERS response is found too slow when using the adaptive volume " <<
Endl;
1243 Log() <<
"size (option \"VolumeRangeMode=Adaptive\"), it might be found beneficial" <<
Endl;
1244 Log() <<
"to reduce the number of events required in the volume, and/or to enlarge" <<
Endl;
1245 Log() <<
"the allowed range (\"NeventsMin/Max\"). PDERS is relatively insensitive" <<
Endl;
1246 Log() <<
"to the width (\"GaussSigma\") of the Gaussian kernel (if used)." <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
bool Bool_t
Boolean (0=false, 1=true) (bool)
int Int_t
Signed integer 4 bytes (int)
float Float_t
Float 4 bytes (float)
double Double_t
Double 8 bytes.
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void pix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
const_iterator begin() const
const_iterator end() const
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Node for the BinarySearch or Decision Trees.
A simple Binary search tree including a volume search method.
static BinarySearchTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=262657)
re-create a new tree (decision tree or search tree) from XML
Class that contains all the data information.
Virtual base Class for all MVA method.
This is a generalization of the above Likelihood methods to dimensions, where is the number of inpu...
void ReadWeightsFromXML(void *wghtnode) override
void DeclareOptions() override
define the options (their key words) that can be set in the option string.
void GetHelpMessage() const override
get help message text
void WriteWeightsToStream(TFile &rf) const
write training sample (TTree) to file
void CreateBinarySearchTree(Types::ETreeType type)
create binary search trees for signal and background
BinarySearchTree * fBinaryTree
binary tree
void MakeClassSpecific(std::ostream &, const TString &) const override
write specific classifier response
virtual ~MethodPDERS(void)
destructor
MethodPDERS(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor for the PDERS method
void GetSample(const Event &e, std::vector< const BinarySearchTreeNode * > &events, Volume *volume)
void ReadWeightsFromStream(std::istream &istr) override
read weight info from file
void ProcessOptions() override
process the options specified by the user
void AddWeightsXMLTo(void *parent) const override
write weights to xml file
Float_t GetError(Float_t countS, Float_t countB, Float_t sumW2S, Float_t sumW2B) const
statistical error estimate for RS estimator
static MethodPDERS * ThisPDERS(void)
static pointer to this object
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
init the size of a volume element using a defined fraction of the volume containing the entire events
void Train(void) override
this is a dummy training: the preparation work to do is the construction of the binary tree as a poin...
Double_t KernelNormalization(Double_t pdf)
Calculating the normalization factor only once (might need a reset at some point.
void RRScalc(const Event &, std::vector< Float_t > *count)
void UpdateThis()
update static this pointer
Double_t CRScalc(const Event &)
void Init(void) override
default initialisation routine called by all constructors
void CalcAverages()
compute also average RMS values required for adaptive Gaussian
void RKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &, std::vector< Float_t > *pdfSum)
normalization factors so we can work with radius 1 hyperspheres
Double_t NormSinc(Double_t x)
NormSinc.
void SetVolumeElement(void)
defines volume dimensions
const std::vector< Float_t > & GetRegressionValues() override
Double_t LanczosFilter(Int_t level, Double_t x)
Lanczos Filter.
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
PDERS can handle classification with 2 classes and regression with one or more regression-targets.
Double_t CKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &)
normalization factors so we can work with radius 1 hyperspheres
Double_t ApplyKernelFunction(Double_t normalized_distance)
from the normalized euclidean distance calculate the distance for a certain kernel
Double_t GetNormalizedDistance(const TMVA::Event &base_event, const BinarySearchTreeNode &sample_event, Double_t *dim_normalization)
We use Euclidian metric here. Might not be best or most efficient.
static Double_t IGetVolumeContentForRoot(Double_t)
Interface to RootFinder.
Double_t GetVolumeContentForRoot(Double_t)
count number of events in rescaled volume
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Singleton class for Global types used by TMVA.
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
Volume for BinarySearchTree.
void ScaleInterval(Double_t f)
"scale" the volume by symmetrically blowing up the interval in each dimension
const Bool_t MethodPDERS_UseFindRoot
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculates a gaussian function with mean and sigma.
Double_t Sqrt(Double_t x)
Returns the square root of x.
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Returns x raised to the power y.
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Double_t Sin(Double_t)
Returns the sine of an angle of x radians.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.