Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodPDERS.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Yair Mahalalel, Joerg Stelzer, Helge Voss, Kai Voss
3
4/***********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodPDERS *
8 * *
9 * *
10 * Description: *
11 * Implementation *
12 * *
13 * Authors (alphabetical): *
14 * Krzysztof Danielowski <danielow@cern.ch> - IFJ PAN & AGH, Poland *
15 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
16 * Kamil Kraszewski <kalq@cern.ch> - IFJ PAN & UJ, Poland *
17 * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Poland *
18 * Yair Mahalalel <Yair.Mahalalel@cern.ch> - CERN, Switzerland *
19 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21 * *
22 * Copyright (c) 2005: *
23 * CERN, Switzerland *
24 * U. of Victoria, Canada *
25 * MPI-K Heidelberg, Germany *
26 * *
27 * Redistribution and use in source and binary forms, with or without *
28 * modification, are permitted according to the terms listed in LICENSE *
29 * (see tmva/doc/LICENSE) *
30 ***********************************************************************************/
31
32/*! \class TMVA::MethodPDERS
33\ingroup TMVA
34
35This is a generalization of the above Likelihood methods to \f$ N_{var} \f$
36dimensions, where \f$ N_{var} \f$ is the number of input variables
37used in the MVA. If the multi-dimensional probability density functions
38(PDFs) for signal and background were known, this method contains the entire
39physical information, and is therefore optimal. Usually, kernel estimation
40methods are used to approximate the PDFs using the events from the
41training sample.
42
43A very simple probability density estimator (PDE) has been suggested
44in [hep-ex/0211019](http://arxiv.org/abs/hep-ex/0211019). The
45PDE for a given test event is obtained from counting the (normalized)
46number of signal and background (training) events that occur in the
47"vicinity" of the test event. The volume that describes "vicinity" is
48user-defined. A [search method based on binary-trees](http://arxiv.org/abs/hep-ex/0211019)
49is used to effectively reduce the
50selection time for the range search. Three different volume definitions
51are optional:
52
53 - *MinMax:* the volume is defined in each dimension with respect
54 to the full variable range found in the training sample.
55 - *RMS:* the volume is defined in each dimensions with respect
56 to the RMS estimated from the training sample.
57 - *Adaptive:* a volume element is defined in each dimensions with
58 respect to the RMS estimated from the training sample. The overall
59 scale of the volume element is then determined for each event so
60 that the total number of events confined in the volume be within
61 a user-defined range.
62
63The adaptive range search is used by default.
64*/
65
66#include "TMVA/MethodPDERS.h"
67
68#include "TMVA/BinaryTree.h"
70#include "TMVA/Configurable.h"
72#include "TMVA/Event.h"
73#include "TMVA/IMethod.h"
74#include "TMVA/MethodBase.h"
75#include "TMVA/MsgLogger.h"
76#include "TMVA/RootFinder.h"
77#include "TMVA/Tools.h"
79#include "TMVA/Types.h"
80
81#include "ThreadLocalStorage.h"
82#include "TFile.h"
83#include "TMath.h"
84
85#include <cassert>
86#include <algorithm>
87
88namespace TMVA {
90};
91
92
94
95
96////////////////////////////////////////////////////////////////////////////////
97/// standard constructor for the PDERS method
98
100 const TString& methodTitle,
103 MethodBase( jobName, Types::kPDERS, methodTitle, theData, theOption),
104 fFcnCall(0),
105 fVRangeMode(kAdaptive),
106 fKernelEstimator(kBox),
107 fDelta(0),
108 fShift(0),
109 fScaleS(0),
110 fScaleB(0),
111 fDeltaFrac(0),
112 fGaussSigma(0),
113 fGaussSigmaNorm(0),
114 fNRegOut(0),
115 fNEventsMin(0),
116 fNEventsMax(0),
117 fMaxVIterations(0),
118 fInitialScale(0),
119 fInitializedVolumeEle(0),
120 fkNNMin(0),
121 fkNNMax(0),
122 fMax_distance(0),
123 fPrinted(0),
124 fNormTree(0)
125{
126 fHelpVolume = NULL;
127 fBinaryTree = NULL;
128}
129
130////////////////////////////////////////////////////////////////////////////////
131/// construct MethodPDERS through from file
132
134 const TString& theWeightFile) :
136 fFcnCall(0),
137 fVRangeMode(kAdaptive),
138 fKernelEstimator(kBox),
139 fDelta(0),
140 fShift(0),
141 fScaleS(0),
142 fScaleB(0),
143 fDeltaFrac(0),
144 fGaussSigma(0),
145 fGaussSigmaNorm(0),
146 fNRegOut(0),
147 fNEventsMin(0),
148 fNEventsMax(0),
149 fMaxVIterations(0),
150 fInitialScale(0),
151 fInitializedVolumeEle(0),
152 fkNNMin(0),
153 fkNNMax(0),
154 fMax_distance(0),
155 fPrinted(0),
156 fNormTree(0)
157{
160}
161
162////////////////////////////////////////////////////////////////////////////////
163/// PDERS can handle classification with 2 classes and regression with one or more regression-targets
164
171
172////////////////////////////////////////////////////////////////////////////////
173/// default initialisation routine called by all constructors
174
176{
177 fBinaryTree = NULL;
178
179 UpdateThis();
180
181 // default options
182 fDeltaFrac = 3.0;
183 fVRangeMode = kAdaptive;
184 fKernelEstimator = kBox;
185
186 // special options for Adaptive mode
187 fNEventsMin = 100;
188 fNEventsMax = 200;
189 fMaxVIterations = 150;
190 fInitialScale = 0.99;
191 fGaussSigma = 0.1;
192 fNormTree = kFALSE;
193
194 fkNNMin = Int_t(fNEventsMin);
195 fkNNMax = Int_t(fNEventsMax);
196
197 fInitializedVolumeEle = kFALSE;
198 fAverageRMS.clear();
199
200 // the minimum requirement to declare an event signal-like
201 SetSignalReferenceCut( 0.0 );
202}
203
204////////////////////////////////////////////////////////////////////////////////
205/// destructor
206
208{
209 if (fDelta) delete fDelta;
210 if (fShift) delete fShift;
211
212 if (NULL != fBinaryTree) delete fBinaryTree;
213}
214
215////////////////////////////////////////////////////////////////////////////////
216/// define the options (their key words) that can be set in the option string.
217///
218/// know options:
219/// - VolumeRangeMode `<string>` Method to determine volume range
220/// available values are:
221/// - MinMax
222/// - Unscaled
223/// - RMS
224/// - kNN
225/// - Adaptive `<default>`
226///
227/// - KernelEstimator `<string>` Kernel estimation function
228/// available values are:
229/// - Box `<default>`
230/// - Sphere
231/// - Teepee
232/// - Gauss
233/// - Sinc3
234/// - Sinc5
235/// - Sinc7
236/// - Sinc9
237/// - Sinc11
238/// - Lanczos2
239/// - Lanczos3
240/// - Lanczos5
241/// - Lanczos8
242/// - Trim
243///
244/// - DeltaFrac `<float>` Ratio of #%EventsMin/#%EventsMax for MinMax and RMS volume range
245/// - NEventsMin `<int>` Minimum number of events for adaptive volume range
246/// - NEventsMax `<int>` Maximum number of events for adaptive volume range
247/// - MaxVIterations `<int>` Maximum number of iterations for adaptive volume range
248/// - InitialScale `<float>` Initial scale for adaptive volume range
249/// - GaussSigma `<float>` Width with respect to the volume size of Gaussian kernel estimator
250
252{
253 DeclareOptionRef(fVolumeRange="Adaptive", "VolumeRangeMode", "Method to determine volume size");
254 AddPreDefVal(TString("Unscaled"));
255 AddPreDefVal(TString("MinMax"));
256 AddPreDefVal(TString("RMS"));
257 AddPreDefVal(TString("Adaptive"));
258 AddPreDefVal(TString("kNN"));
259
260 DeclareOptionRef(fKernelString="Box", "KernelEstimator", "Kernel estimation function");
261 AddPreDefVal(TString("Box"));
262 AddPreDefVal(TString("Sphere"));
263 AddPreDefVal(TString("Teepee"));
264 AddPreDefVal(TString("Gauss"));
265 AddPreDefVal(TString("Sinc3"));
266 AddPreDefVal(TString("Sinc5"));
267 AddPreDefVal(TString("Sinc7"));
268 AddPreDefVal(TString("Sinc9"));
269 AddPreDefVal(TString("Sinc11"));
270 AddPreDefVal(TString("Lanczos2"));
271 AddPreDefVal(TString("Lanczos3"));
272 AddPreDefVal(TString("Lanczos5"));
273 AddPreDefVal(TString("Lanczos8"));
274 AddPreDefVal(TString("Trim"));
275
276 DeclareOptionRef(fDeltaFrac , "DeltaFrac", "nEventsMin/Max for minmax and rms volume range");
277 DeclareOptionRef(fNEventsMin , "NEventsMin", "nEventsMin for adaptive volume range");
278 DeclareOptionRef(fNEventsMax , "NEventsMax", "nEventsMax for adaptive volume range");
279 DeclareOptionRef(fMaxVIterations, "MaxVIterations", "MaxVIterations for adaptive volume range");
280 DeclareOptionRef(fInitialScale , "InitialScale", "InitialScale for adaptive volume range");
281 DeclareOptionRef(fGaussSigma , "GaussSigma", "Width (wrt volume size) of Gaussian kernel estimator");
282 DeclareOptionRef(fNormTree , "NormTree", "Normalize binary search tree");
283}
284
285////////////////////////////////////////////////////////////////////////////////
286/// process the options specified by the user
287
289{
290 if (IgnoreEventsWithNegWeightsInTraining()) {
291 Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
292 << GetMethodTypeName()
293 << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
294 << Endl;
295 }
296
297 fGaussSigmaNorm = fGaussSigma; // * TMath::Sqrt( Double_t(GetNvar()) );
298
299 fVRangeMode = MethodPDERS::kUnsupported;
300
301 if (fVolumeRange == "MinMax" ) fVRangeMode = kMinMax;
302 else if (fVolumeRange == "RMS" ) fVRangeMode = kRMS;
303 else if (fVolumeRange == "Adaptive" ) fVRangeMode = kAdaptive;
304 else if (fVolumeRange == "Unscaled" ) fVRangeMode = kUnscaled;
305 else if (fVolumeRange == "kNN" ) fVRangeMode = kkNN;
306 else {
307 Log() << kFATAL << "VolumeRangeMode parameter '" << fVolumeRange << "' unknown" << Endl;
308 }
309
310 if (fKernelString == "Box" ) fKernelEstimator = kBox;
311 else if (fKernelString == "Sphere" ) fKernelEstimator = kSphere;
312 else if (fKernelString == "Teepee" ) fKernelEstimator = kTeepee;
313 else if (fKernelString == "Gauss" ) fKernelEstimator = kGauss;
314 else if (fKernelString == "Sinc3" ) fKernelEstimator = kSinc3;
315 else if (fKernelString == "Sinc5" ) fKernelEstimator = kSinc5;
316 else if (fKernelString == "Sinc7" ) fKernelEstimator = kSinc7;
317 else if (fKernelString == "Sinc9" ) fKernelEstimator = kSinc9;
318 else if (fKernelString == "Sinc11" ) fKernelEstimator = kSinc11;
319 else if (fKernelString == "Lanczos2" ) fKernelEstimator = kLanczos2;
320 else if (fKernelString == "Lanczos3" ) fKernelEstimator = kLanczos3;
321 else if (fKernelString == "Lanczos5" ) fKernelEstimator = kLanczos5;
322 else if (fKernelString == "Lanczos8" ) fKernelEstimator = kLanczos8;
323 else if (fKernelString == "Trim" ) fKernelEstimator = kTrim;
324 else {
325 Log() << kFATAL << "KernelEstimator parameter '" << fKernelString << "' unknown" << Endl;
326 }
327
328 // TODO: Add parameter validation
329
330 Log() << kVERBOSE << "interpreted option string: vRangeMethod: '"
331 << (const char*)((fVRangeMode == kMinMax) ? "MinMax" :
332 (fVRangeMode == kUnscaled) ? "Unscaled" :
333 (fVRangeMode == kRMS ) ? "RMS" : "Adaptive") << "'" << Endl;
334 if (fVRangeMode == kMinMax || fVRangeMode == kRMS)
335 Log() << kVERBOSE << "deltaFrac: " << fDeltaFrac << Endl;
336 else
337 Log() << kVERBOSE << "nEventsMin/Max, maxVIterations, initialScale: "
338 << fNEventsMin << " " << fNEventsMax
339 << " " << fMaxVIterations << " " << fInitialScale << Endl;
340 Log() << kVERBOSE << "KernelEstimator = " << fKernelString << Endl;
341}
342
343////////////////////////////////////////////////////////////////////////////////
344/// this is a dummy training: the preparation work to do is the construction
345/// of the binary tree as a pointer chain. It is easier to directly save the
346/// trainingTree in the weight file, and to rebuild the binary tree in the
347/// test phase from scratch
348
350{
351 if (IsNormalised()) Log() << kFATAL << "\"Normalise\" option cannot be used with PDERS; "
352 << "please remove the option from the configuration string, or "
353 << "use \"!Normalise\""
354 << Endl;
355
356 CreateBinarySearchTree( Types::kTraining );
357
358 CalcAverages();
359 SetVolumeElement();
360
361 fInitializedVolumeEle = kTRUE;
362 ExitFromTraining();
363}
364
365////////////////////////////////////////////////////////////////////////////////
366/// init the size of a volume element using a defined fraction of the
367/// volume containing the entire events
368
370{
371 if (fInitializedVolumeEle == kFALSE) {
372 fInitializedVolumeEle = kTRUE;
373
374 // binary trees must exist
375 assert( fBinaryTree );
376
377 CalcAverages();
378 SetVolumeElement();
379 }
380
381 // cannot determine error
382 NoErrorCalc(err, errUpper);
383
384 return this->CRScalc( *GetEvent() );
385}
386
387////////////////////////////////////////////////////////////////////////////////
388
389const std::vector< Float_t >& TMVA::MethodPDERS::GetRegressionValues()
390{
391 if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>;
392 fRegressionReturnVal->clear();
393 // init the size of a volume element using a defined fraction of the
394 // volume containing the entire events
395 if (fInitializedVolumeEle == kFALSE) {
396 fInitializedVolumeEle = kTRUE;
397
398 // binary trees must exist
399 assert( fBinaryTree );
400
401 CalcAverages();
402
403 SetVolumeElement();
404 }
405
406 const Event* ev = GetEvent();
407 this->RRScalc( *ev, fRegressionReturnVal );
408
409 Event * evT = new Event(*ev);
410 UInt_t ivar = 0;
411 for (std::vector<Float_t>::iterator it = fRegressionReturnVal->begin(); it != fRegressionReturnVal->end(); ++it ) {
412 evT->SetTarget(ivar,(*it));
413 ivar++;
414 }
415
416 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
417 fRegressionReturnVal->clear();
418
419 for (ivar = 0; ivar<evT2->GetNTargets(); ivar++) {
420 fRegressionReturnVal->push_back(evT2->GetTarget(ivar));
421 }
422
423 delete evT;
424
425
426 return (*fRegressionReturnVal);
427}
428
429////////////////////////////////////////////////////////////////////////////////
430/// compute also average RMS values required for adaptive Gaussian
431
433{
434 if (fVRangeMode == kAdaptive || fVRangeMode == kRMS || fVRangeMode == kkNN ) {
435 fAverageRMS.clear();
436 fBinaryTree->CalcStatistics();
437
438 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
439 if (!DoRegression()){ //why there are separate rms for signal and background?
440 Float_t rmsS = fBinaryTree->RMS(Types::kSignal, ivar);
441 Float_t rmsB = fBinaryTree->RMS(Types::kBackground, ivar);
442 fAverageRMS.push_back( (rmsS + rmsB)*0.5 );
443 } else {
444 Float_t rms = fBinaryTree->RMS( ivar );
445 fAverageRMS.push_back( rms );
446 }
447 }
448 }
449}
450
451////////////////////////////////////////////////////////////////////////////////
452/// create binary search trees for signal and background
453
455{
456 if (NULL != fBinaryTree) delete fBinaryTree;
457 fBinaryTree = new BinarySearchTree();
458 if (fNormTree) {
459 fBinaryTree->SetNormalize( kTRUE );
460 }
461
462 fBinaryTree->Fill( GetEventCollection(type) );
463
464 if (fNormTree) {
465 fBinaryTree->NormalizeTree();
466 }
467
468 if (!DoRegression()) {
469 // these are the signal and background scales for the weights
470 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
471 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
472
473 Log() << kVERBOSE << "Signal and background scales: " << fScaleS << " " << fScaleB << Endl;
474 }
475}
476
477////////////////////////////////////////////////////////////////////////////////
478/// defines volume dimensions
479
481 if (GetNvar()==0) {
482 Log() << kFATAL << "GetNvar() == 0" << Endl;
483 return;
484 }
485
486 // init relative scales
487 fkNNMin = Int_t(fNEventsMin);
488 fkNNMax = Int_t(fNEventsMax);
489
490 if (fDelta) delete fDelta;
491 if (fShift) delete fShift;
492 fDelta = new std::vector<Float_t>( GetNvar() );
493 fShift = new std::vector<Float_t>( GetNvar() );
494
495 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
496 switch (fVRangeMode) {
497
498 case kRMS:
499 case kkNN:
500 case kAdaptive:
501 // sanity check
502 if (fAverageRMS.size() != GetNvar())
503 Log() << kFATAL << "<SetVolumeElement> RMS not computed: " << fAverageRMS.size() << Endl;
504 (*fDelta)[ivar] = fAverageRMS[ivar]*fDeltaFrac;
505 Log() << kVERBOSE << "delta of var[" << (*fInputVars)[ivar]
506 << "\t]: " << fAverageRMS[ivar]
507 << "\t | comp with |max - min|: " << (GetXmax( ivar ) - GetXmin( ivar ))
508 << Endl;
509 break;
510 case kMinMax:
511 (*fDelta)[ivar] = (GetXmax( ivar ) - GetXmin( ivar ))*fDeltaFrac;
512 break;
513 case kUnscaled:
514 (*fDelta)[ivar] = fDeltaFrac;
515 break;
516 default:
517 Log() << kFATAL << "<SetVolumeElement> unknown range-set mode: "
518 << fVRangeMode << Endl;
519 }
520 (*fShift)[ivar] = 0.5; // volume is centered around test value
521 }
522
523}
524
525////////////////////////////////////////////////////////////////////////////////
526/// Interface to RootFinder
527
529{
530 return ThisPDERS()->GetVolumeContentForRoot( scale );
531}
532
533////////////////////////////////////////////////////////////////////////////////
534/// count number of events in rescaled volume
535
537{
538 Volume v( *fHelpVolume );
539 v.ScaleInterval( scale );
540
541 Double_t count = GetBinaryTree()->SearchVolume( &v );
542
543 v.Delete();
544 return count;
545}
546
548 std::vector<const BinarySearchTreeNode*>& events,
549 Volume *volume )
550{
551 Float_t count = 0;
552
553 // -------------------------------------------------------------------------
554 //
555 // ==== test of volume search =====
556 //
557 // #define TMVA::MethodPDERS__countByHand__Debug__
558
559#ifdef TMVA_MethodPDERS__countByHand__Debug__
560
561 // starting values
562 count = fBinaryTree->SearchVolume( volume );
563
564 Int_t iS = 0, iB = 0;
565 UInt_t nvar = GetNvar();
566 for (UInt_t ievt_=0; ievt_<Data()->GetNTrainingEvents(); ievt_++) {
567 const Event * ev = GetTrainingEvent(ievt_);
568 Bool_t inV;
569 for (Int_t ivar=0; ivar<nvar; ivar++) {
570 Float_t x = ev->GetValue(ivar);
571 inV = (x > (*volume->Lower)[ivar] && x <= (*volume->Upper)[ivar]);
572 if (!inV) break;
573 }
574 if (inV) {
575 in++;
576 }
577 }
578 Log() << kVERBOSE << "debug: my test: " << in << Endl;// <- ***********tree
579 Log() << kVERBOSE << "debug: binTree: " << count << Endl << Endl;// <- ***********tree
580
581#endif
582
583 // -------------------------------------------------------------------------
584
585 if (fVRangeMode == kRMS || fVRangeMode == kMinMax || fVRangeMode == kUnscaled) { // Constant volume
586
587 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
588 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
589 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
590 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
591 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
592 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
593 }
594 Volume* svolume = new Volume( lb, ub );
595 // starting values
596
597 fBinaryTree->SearchVolume( svolume, &events );
598 }
599 else if (fVRangeMode == kAdaptive) { // adaptive volume
600
601 // -----------------------------------------------------------------------
602
603 // TODO: optimize, perhaps multi stage with broadening limits,
604 // or a different root finding method entirely,
605 if (MethodPDERS_UseFindRoot) {
606
607 // that won't need to search through large volume, where the bottle neck probably is
608
609 fHelpVolume = volume;
610
611 UpdateThis(); // necessary update of static pointer
612 RootFinder rootFinder( this, 0.01, 50, 200, 10 );
613 Double_t scale = rootFinder.Root( (fNEventsMin + fNEventsMax)/2.0 );
614
615 volume->ScaleInterval( scale );
616
617 fBinaryTree->SearchVolume( volume, &events );
618
619 fHelpVolume = NULL;
620 }
621 // -----------------------------------------------------------------------
622 else {
623
624 // starting values
625 count = fBinaryTree->SearchVolume( volume );
626
627 Float_t nEventsO = count;
628 Int_t i_=0;
629
630 while (nEventsO < fNEventsMin) { // this isn't a sain start... try again
631 volume->ScaleInterval( 1.15 );
632 count = fBinaryTree->SearchVolume( volume );
633 nEventsO = count;
634 i_++;
635 }
636 if (i_ > 50) Log() << kWARNING << "warning in event: " << e
637 << ": adaptive volume pre-adjustment reached "
638 << ">50 iterations in while loop (" << i_ << ")" << Endl;
639
641 Float_t nEventsE = 0.5*(fNEventsMin + fNEventsMax);
642 Float_t scaleO = 1.0;
643 Float_t scaleN = fInitialScale;
647
648 for (Int_t ic=1; ic<fMaxVIterations; ic++) {
649 if (nEventsN < fNEventsMin || nEventsN > fNEventsMax) {
650
651 // search for events in rescaled volume
652 Volume* v = new Volume( *volume );
653 v->ScaleInterval( scale );
654 nEventsN = fBinaryTree->SearchVolume( v );
655
656 // determine next iteration (linear approximation)
657 if (nEventsN > 1 && nEventsN - nEventsO != 0)
658 if (scaleN - scaleO != 0)
660 else
661 scale += (-0.01); // should not actually occur...
662 else
663 scale += 0.5; // use much larger volume
664
665 // save old scale
666 scaleN = scale;
667
668 // take if better (don't accept it if too small number of events)
670 (nEventsN >= fNEventsMin || nEventsBest < nEventsN)) {
673 }
674
675 v->Delete();
676 delete v;
677 }
678 else break;
679 }
680
681 // last sanity check
683 // include "1" to cover float precision
685 Log() << kWARNING << "warning in event " << e
686 << ": adaptive volume adjustment reached "
687 << "max. #iterations (" << fMaxVIterations << ")"
688 << "[ nEvents: " << nEventsN << " " << fNEventsMin << " " << fNEventsMax << "]"
689 << Endl;
690
691 volume->ScaleInterval( scaleBest );
692 fBinaryTree->SearchVolume( volume, &events );
693 }
694
695 // end of adaptive method
696
697 } else if (fVRangeMode == kkNN) {
698 Volume v(*volume);
699
700 events.clear();
701 // check number of signals in beginning volume
702 Int_t kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 );
703 //if this number is too large return fkNNMax+1
704
705 Int_t t_times = 0; // number of iterations
706
707 while ( !(kNNcount >= fkNNMin && kNNcount <= fkNNMax) ) {
708 if (kNNcount < fkNNMin) { //if we have too less points
709 Float_t scale = 2; //better scale needed
710 volume->ScaleInterval( scale );
711 }
712 else if (kNNcount > fkNNMax) { //if we have too many points
713 Float_t scale = 0.1; //better scale needed
714 volume->ScaleInterval( scale );
715 }
716 events.clear();
717
718 v = *volume ;
719
720 kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 ); //new search
721
722 t_times++;
723
724 if (t_times == fMaxVIterations) {
725 Log() << kWARNING << "warning in event" << e
726 << ": kNN volume adjustment reached "
727 << "max. #iterations (" << fMaxVIterations << ")"
728 << "[ kNN: " << fkNNMin << " " << fkNNMax << Endl;
729 break;
730 }
731 }
732
733 //vector to normalize distance in each dimension
734 Double_t *dim_normalization = new Double_t[GetNvar()];
735 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
736 dim_normalization [ivar] = 1.0 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
737 }
738
739 std::vector<const BinarySearchTreeNode*> tempVector; // temporary vector for events
740
741 if (kNNcount >= fkNNMin) {
742 std::vector<Double_t> *distances = new std::vector<Double_t>( kNNcount );
743
744 //counting the distance for each event
745 for (Int_t j=0;j< Int_t(events.size()) ;j++)
746 (*distances)[j] = GetNormalizedDistance ( e, *events[j], dim_normalization );
747
748 //counting the fkNNMin-th element
749 std::vector<Double_t>::iterator wsk = distances->begin();
750 for (Int_t j=0;j<fkNNMin-1;++j) ++wsk;
751 std::nth_element( distances->begin(), wsk, distances->end() );
752
753 //getting all elements that are closer than fkNNMin-th element
754 //signals
755 for (Int_t j=0;j<Int_t(events.size());j++) {
756 Double_t dist = GetNormalizedDistance( e, *events[j], dim_normalization );
757
758 if (dist <= (*distances)[fkNNMin-1])
759 tempVector.push_back( events[j] );
760 }
761 fMax_distance = (*distances)[fkNNMin-1];
762 delete distances;
763 }
764 delete[] dim_normalization;
765 events = tempVector;
766
767 } else {
768
769 // troubles ahead...
770 Log() << kFATAL << "<GetSample> unknown RangeMode: " << fVRangeMode << Endl;
771 }
772 // -----------------------------------------------------------------------
773}
774
775////////////////////////////////////////////////////////////////////////////////
776
778{
779 std::vector<const BinarySearchTreeNode*> events;
780
781 // computes event weight by counting number of signal and background
782 // events (of reference sample) that are found within given volume
783 // defined by the event
784 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
785 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
786
787 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
788 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
789 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
790 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
791 }
792
793 Volume *volume = new Volume( lb, ub );
794
795 GetSample( e, events, volume );
796 Double_t count = CKernelEstimate( e, events, *volume );
797 delete volume;
798 delete lb;
799 delete ub;
800
801 return count;
802}
803
804////////////////////////////////////////////////////////////////////////////////
805
806void TMVA::MethodPDERS::RRScalc( const Event& e, std::vector<Float_t>* count )
807{
808 std::vector<const BinarySearchTreeNode*> events;
809
810 // computes event weight by counting number of signal and background
811 // events (of reference sample) that are found within given volume
812 // defined by the event
813 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
814 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
815
816 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
817 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
818 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
819 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
820 }
821 Volume *volume = new Volume( lb, ub );
822
823 GetSample( e, events, volume );
824 RKernelEstimate( e, events, *volume, count );
825
826 delete volume;
827
828 return;
829}
830////////////////////////////////////////////////////////////////////////////////
831/// normalization factors so we can work with radius 1 hyperspheres
832
834 std::vector<const BinarySearchTreeNode*>& events, Volume& v )
835{
836 Double_t *dim_normalization = new Double_t[GetNvar()];
837 for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
838 dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
839
840 Double_t pdfSumS = 0;
841 Double_t pdfSumB = 0;
842
843 // Iteration over sample points
844 for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); ++iev) {
845
846 // First switch to the one dimensional distance
847 Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
848
849 // always working within the hyperelipsoid, except for when we don't
850 // note that rejection ratio goes to 1 as nvar goes to infinity
851 if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
852
853 if ( (*iev)->GetClass()==fSignalClass )
854 pdfSumS += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
855 else
856 pdfSumB += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
857 }
858 pdfSumS = KernelNormalization( pdfSumS < 0. ? 0. : pdfSumS );
859 pdfSumB = KernelNormalization( pdfSumB < 0. ? 0. : pdfSumB );
860
861 delete[] dim_normalization;
862
863 if (pdfSumS < 1e-20 && pdfSumB < 1e-20) return 0.5;
864 if (pdfSumB < 1e-20) return 1.0;
865 if (pdfSumS < 1e-20) return 0.0;
866
867 Float_t r = pdfSumB*fScaleB/(pdfSumS*fScaleS);
868 return 1.0/(r + 1.0); // TODO: propagate errors from here
869}
870
871////////////////////////////////////////////////////////////////////////////////
872/// normalization factors so we can work with radius 1 hyperspheres
873
875 std::vector<const BinarySearchTreeNode*>& events, Volume& v,
876 std::vector<Float_t>* pdfSum )
877{
878 Double_t *dim_normalization = new Double_t[GetNvar()];
879 for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
880 dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
881
882 // std::vector<Float_t> pdfSum;
883 pdfSum->clear();
884 Float_t pdfDiv = 0;
885 fNRegOut = 1; // for now, regression is just for 1 dimension
886
887 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
888 pdfSum->push_back( 0 );
889
890 // Iteration over sample points
891 for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); ++iev) {
892
893 // First switch to the one dimensional distance
894 Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
895
896 // always working within the hyperelipsoid, except for when we don't
897 // note that rejection ratio goes to 1 as nvar goes to infinity
898 if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
899
900 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++) {
901 pdfSum->at(ivar) += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight() * (*iev)->GetTargets()[ivar];
902 pdfDiv += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
903 }
904 }
905
906 delete[] dim_normalization;
907
908 if (pdfDiv == 0)
909 return;
910
911 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
912 pdfSum->at(ivar) /= pdfDiv;
913
914 return;
915}
916
917////////////////////////////////////////////////////////////////////////////////
918/// from the normalized euclidean distance calculate the distance
919/// for a certain kernel
920
922{
923 switch (fKernelEstimator) {
924 case kBox:
925 case kSphere:
926 return 1;
927 break;
928 case kTeepee:
929 return (1 - normalized_distance);
930 break;
931 case kGauss:
932 return TMath::Gaus( normalized_distance, 0, fGaussSigmaNorm, kFALSE);
933 break;
934 case kSinc3:
935 case kSinc5:
936 case kSinc7:
937 case kSinc9:
938 case kSinc11: {
939 Double_t side_crossings = 2 + ((int) fKernelEstimator) - ((int) kSinc3);
940 return NormSinc (side_crossings * normalized_distance);
941 }
942 break;
943 case kLanczos2:
944 return LanczosFilter (2, normalized_distance);
945 break;
946 case kLanczos3:
947 return LanczosFilter (3, normalized_distance);
948 break;
949 case kLanczos5:
950 return LanczosFilter (5, normalized_distance);
951 break;
952 case kLanczos8:
953 return LanczosFilter (8, normalized_distance);
954 break;
955 case kTrim: {
956 Double_t x = normalized_distance / fMax_distance;
957 x = 1 - x*x*x;
958 return x*x*x;
959 }
960 break;
961 default:
962 Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
963 break;
964 }
965
966 return 0;
967}
968
969////////////////////////////////////////////////////////////////////////////////
970/// Calculating the normalization factor only once (might need a reset at some point.
971/// Can the method be restarted with different params?)
972
974{
975 // Caching jammed to disable function.
976 // It's not really useful after all, badly implemented and untested :-)
977 TTHREAD_TLS(Double_t) ret = 1.0;
978
979 if (ret != 0.0) return ret*pdf;
980
981 // We first normalize by the volume of the hypersphere.
982 switch (fKernelEstimator) {
983 case kBox:
984 case kSphere:
985 ret = 1.;
986 break;
987 case kTeepee:
988 ret = (GetNvar() * (GetNvar() + 1) * TMath::Gamma (((Double_t) GetNvar()) / 2.)) /
989 ( TMath::Power (2., (Double_t) GetNvar() + 1) * TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.));
990 break;
991 case kGauss:
992 // We use full range integral here. Reasonable because of the fast function decay.
993 ret = 1. / TMath::Power ( 2 * TMath::Pi() * fGaussSigmaNorm * fGaussSigmaNorm, ((Double_t) GetNvar()) / 2.);
994 break;
995 case kSinc3:
996 case kSinc5:
997 case kSinc7:
998 case kSinc9:
999 case kSinc11:
1000 case kLanczos2:
1001 case kLanczos3:
1002 case kLanczos5:
1003 case kLanczos8:
1004 // We use the full range integral here. Reasonable because the central lobe dominates it.
1005 ret = 1 / TMath::Power ( 2., (Double_t) GetNvar() );
1006 break;
1007 default:
1008 Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
1009 }
1010
1011 // Normalizing by the full volume
1012 ret *= ( TMath::Power (2., static_cast<Int_t>(GetNvar())) * TMath::Gamma (1 + (((Double_t) GetNvar()) / 2.)) ) /
1013 TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.);
1014
1015 return ret*pdf;
1016}
1017
1018////////////////////////////////////////////////////////////////////////////////
1019/// We use Euclidian metric here. Might not be best or most efficient.
1020
1024{
1025 Double_t ret=0;
1026 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1027 Double_t dist = dim_normalization[ivar] * (sample_event.GetEventV()[ivar] - base_event.GetValue(ivar));
1028 ret += dist*dist;
1029 }
1030 // DD 26.11.2008: bugfix: division by (GetNvar()) was missing for correct normalisation
1031 ret /= GetNvar();
1032 return TMath::Sqrt (ret);
1033}
1034
1035////////////////////////////////////////////////////////////////////////////////
1036/// NormSinc
1037
1039{
1040 if (x < 10e-10 && x > -10e-10) {
1041 return 1; // Poor man's l'Hopital
1042 }
1043
1044 Double_t pix = TMath::Pi() * x;
1046 Double_t ret;
1047
1048 if (GetNvar() % 2)
1049 ret = TMath::Power (sinc, static_cast<Int_t>(GetNvar()));
1050 else
1051 ret = TMath::Abs (sinc) * TMath::Power (sinc, static_cast<Int_t>(GetNvar() - 1));
1052
1053 return ret;
1054}
1055
1056////////////////////////////////////////////////////////////////////////////////
1057/// Lanczos Filter
1058
1060{
1061 if (x < 10e-10 && x > -10e-10) {
1062 return 1; // Poor man's l'Hopital
1063 }
1064
1065 Double_t pix = TMath::Pi() * x;
1066 Double_t pixtimesn = pix * ((Double_t) level);
1068 Double_t ret;
1069
1070 if (GetNvar() % 2) ret = TMath::Power (lanczos, static_cast<Int_t>(GetNvar()));
1071 else ret = TMath::Abs (lanczos) * TMath::Power (lanczos, static_cast<Int_t>(GetNvar() - 1));
1072
1073 return ret;
1074}
1075
1076////////////////////////////////////////////////////////////////////////////////
1077/// statistical error estimate for RS estimator
1078
1080 Float_t sumW2S, Float_t sumW2B ) const
1081{
1082 Float_t c = fScaleB/fScaleS;
1083 Float_t d = countS + c*countB; d *= d;
1084
1085 if (d < 1e-10) return 1; // Error is zero because of B = S = 0
1086
1087 Float_t f = c*c/d/d;
1089
1090 if (err < 1e-10) return 1; // Error is zero because of B or S = 0
1091
1092 return sqrt(err);
1093}
1094
1095////////////////////////////////////////////////////////////////////////////////
1096/// write weights to xml file
1097
1098void TMVA::MethodPDERS::AddWeightsXMLTo( void* parent ) const
1099{
1100 void* wght = gTools().AddChild(parent, "Weights");
1101 if (fBinaryTree)
1102 fBinaryTree->AddXMLTo(wght);
1103 else
1104 Log() << kFATAL << "Signal and background binary search tree not available" << Endl;
1105 //Log() << kFATAL << "Please implement writing of weights as XML" << Endl;
1106}
1107
1108////////////////////////////////////////////////////////////////////////////////
1109
1111{
1112 if (NULL != fBinaryTree) delete fBinaryTree;
1113 void* treenode = gTools().GetChild(wghtnode);
1115 if(!fBinaryTree)
1116 Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1117 if(!fBinaryTree)
1118 Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1119 fBinaryTree->SetPeriode( GetNvar() );
1120 fBinaryTree->CalcStatistics();
1121 fBinaryTree->CountNodes();
1122 if (fBinaryTree->GetSumOfWeights( Types::kSignal ) > 0)
1123 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
1124 else fScaleS = 1;
1125 if (fBinaryTree->GetSumOfWeights( Types::kBackground ) > 0)
1126 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
1127 else fScaleB = 1;
1128 Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1129 CalcAverages();
1130 SetVolumeElement();
1131 fInitializedVolumeEle = kTRUE;
1132}
1133
1134////////////////////////////////////////////////////////////////////////////////
1135/// read weight info from file
1136
1138{
1139 if (NULL != fBinaryTree) delete fBinaryTree;
1140
1141 fBinaryTree = new BinarySearchTree();
1142
1143 istr >> *fBinaryTree;
1144
1145 fBinaryTree->SetPeriode( GetNvar() );
1146
1147 fBinaryTree->CalcStatistics();
1148
1149 fBinaryTree->CountNodes();
1150
1151 // these are the signal and background scales for the weights
1152 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
1153 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
1154
1155 Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1156
1157 CalcAverages();
1158
1159 SetVolumeElement();
1160
1161 fInitializedVolumeEle = kTRUE;
1162}
1163
1164////////////////////////////////////////////////////////////////////////////////
1165/// write training sample (TTree) to file
1166
1170
1171////////////////////////////////////////////////////////////////////////////////
1172/// read training sample from file
1173
1177
1178////////////////////////////////////////////////////////////////////////////////
1179/// static pointer to this object
1180
1182{
1183 return GetMethodPDERSThreadLocal();
1184}
1185////////////////////////////////////////////////////////////////////////////////
1186/// update static this pointer
1187
1189{
1190 GetMethodPDERSThreadLocal() = this;
1191}
1192
1193////////////////////////////////////////////////////////////////////////////////
1194/// write specific classifier response
1195
1196void TMVA::MethodPDERS::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1197{
1198 fout << " // not implemented for class: \"" << className << "\"" << std::endl;
1199 fout << "};" << std::endl;
1200}
1201
1202////////////////////////////////////////////////////////////////////////////////
1203/// get help message text
1204///
1205/// typical length of text line:
1206/// "|--------------------------------------------------------------|"
1207
1209{
1210 Log() << Endl;
1211 Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1212 Log() << Endl;
1213 Log() << "PDERS is a generalization of the projective likelihood classifier " << Endl;
1214 Log() << "to N dimensions, where N is the number of input variables used." << Endl;
1215 Log() << "In its adaptive form it is mostly equivalent to k-Nearest-Neighbor" << Endl;
1216 Log() << "(k-NN) methods. If the multidimensional PDF for signal and background" << Endl;
1217 Log() << "were known, this classifier would exploit the full information" << Endl;
1218 Log() << "contained in the input variables, and would hence be optimal. In " << Endl;
1219 Log() << "practice however, huge training samples are necessary to sufficiently " << Endl;
1220 Log() << "populate the multidimensional phase space. " << Endl;
1221 Log() << Endl;
1222 Log() << "The simplest implementation of PDERS counts the number of signal" << Endl;
1223 Log() << "and background events in the vicinity of a test event, and returns" << Endl;
1224 Log() << "a weight according to the majority species of the neighboring events." << Endl;
1225 Log() << "A more involved version of PDERS (selected by the option \"KernelEstimator\")" << Endl;
1226 Log() << "uses Kernel estimation methods to approximate the shape of the PDF." << Endl;
1227 Log() << Endl;
1228 Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
1229 Log() << Endl;
1230 Log() << "PDERS can be very powerful in case of strongly non-linear problems, " << Endl;
1231 Log() << "e.g., distinct islands of signal and background regions. Because of " << Endl;
1232 Log() << "the exponential growth of the phase space, it is important to restrict" << Endl;
1233 Log() << "the number of input variables (dimension) to the strictly necessary." << Endl;
1234 Log() << Endl;
1235 Log() << "Note that PDERS is a slowly responding classifier. Moreover, the necessity" << Endl;
1236 Log() << "to store the entire binary tree in memory, to avoid accessing virtual " << Endl;
1237 Log() << "memory, limits the number of training events that can effectively be " << Endl;
1238 Log() << "used to model the multidimensional PDF." << Endl;
1239 Log() << Endl;
1240 Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1241 Log() << Endl;
1242 Log() << "If the PDERS response is found too slow when using the adaptive volume " << Endl;
1243 Log() << "size (option \"VolumeRangeMode=Adaptive\"), it might be found beneficial" << Endl;
1244 Log() << "to reduce the number of events required in the volume, and/or to enlarge" << Endl;
1245 Log() << "the allowed range (\"NeventsMin/Max\"). PDERS is relatively insensitive" << Endl;
1246 Log() << "to the width (\"GaussSigma\") of the Gaussian kernel (if used)." << Endl;
1247}
@ kBox
Definition Buttons.h:29
#define REGISTER_METHOD(CLASS)
for example
#define d(i)
Definition RSha256.hxx:102
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define e(i)
Definition RSha256.hxx:103
bool Bool_t
Boolean (0=false, 1=true) (bool)
Definition RtypesCore.h:77
int Int_t
Signed integer 4 bytes (int)
Definition RtypesCore.h:59
float Float_t
Float 4 bytes (float)
Definition RtypesCore.h:71
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void pix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
const_iterator begin() const
const_iterator end() const
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:131
Node for the BinarySearch or Decision Trees.
A simple Binary search tree including a volume search method.
static BinarySearchTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=262657)
re-create a new tree (decision tree or search tree) from XML
Class that contains all the data information.
Definition DataSetInfo.h:62
Virtual base Class for all MVA method.
Definition MethodBase.h:111
This is a generalization of the above Likelihood methods to dimensions, where is the number of inpu...
Definition MethodPDERS.h:61
void ReadWeightsFromXML(void *wghtnode) override
void DeclareOptions() override
define the options (their key words) that can be set in the option string.
void GetHelpMessage() const override
get help message text
void WriteWeightsToStream(TFile &rf) const
write training sample (TTree) to file
void CreateBinarySearchTree(Types::ETreeType type)
create binary search trees for signal and background
BinarySearchTree * fBinaryTree
binary tree
void MakeClassSpecific(std::ostream &, const TString &) const override
write specific classifier response
virtual ~MethodPDERS(void)
destructor
MethodPDERS(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor for the PDERS method
void GetSample(const Event &e, std::vector< const BinarySearchTreeNode * > &events, Volume *volume)
void ReadWeightsFromStream(std::istream &istr) override
read weight info from file
void ProcessOptions() override
process the options specified by the user
void AddWeightsXMLTo(void *parent) const override
write weights to xml file
Float_t GetError(Float_t countS, Float_t countB, Float_t sumW2S, Float_t sumW2B) const
statistical error estimate for RS estimator
static MethodPDERS * ThisPDERS(void)
static pointer to this object
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
init the size of a volume element using a defined fraction of the volume containing the entire events
void Train(void) override
this is a dummy training: the preparation work to do is the construction of the binary tree as a poin...
Double_t KernelNormalization(Double_t pdf)
Calculating the normalization factor only once (might need a reset at some point.
void RRScalc(const Event &, std::vector< Float_t > *count)
void UpdateThis()
update static this pointer
Double_t CRScalc(const Event &)
void Init(void) override
default initialisation routine called by all constructors
void CalcAverages()
compute also average RMS values required for adaptive Gaussian
void RKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &, std::vector< Float_t > *pdfSum)
normalization factors so we can work with radius 1 hyperspheres
Double_t NormSinc(Double_t x)
NormSinc.
void SetVolumeElement(void)
defines volume dimensions
const std::vector< Float_t > & GetRegressionValues() override
Double_t LanczosFilter(Int_t level, Double_t x)
Lanczos Filter.
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
PDERS can handle classification with 2 classes and regression with one or more regression-targets.
Double_t CKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &)
normalization factors so we can work with radius 1 hyperspheres
Double_t ApplyKernelFunction(Double_t normalized_distance)
from the normalized euclidean distance calculate the distance for a certain kernel
Double_t GetNormalizedDistance(const TMVA::Event &base_event, const BinarySearchTreeNode &sample_event, Double_t *dim_normalization)
We use Euclidian metric here. Might not be best or most efficient.
static Double_t IGetVolumeContentForRoot(Double_t)
Interface to RootFinder.
Double_t GetVolumeContentForRoot(Double_t)
count number of events in rescaled volume
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition RootFinder.h:48
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:828
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1150
void * AddChild(void *parent, const char *childname, const char *content=nullptr, bool isRootNode=false)
add child node
Definition Tools.cxx:1124
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
Definition Types.h:135
@ kBackground
Definition Types.h:136
@ kClassification
Definition Types.h:127
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
Volume for BinarySearchTree.
Definition Volume.h:47
void ScaleInterval(Double_t f)
"scale" the volume by symmetrically blowing up the interval in each dimension
Definition Volume.cxx:180
const Bool_t MethodPDERS_UseFindRoot
Basic string class.
Definition TString.h:138
Double_t x[n]
Definition legend1.C:17
create variable transformations
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculates a gaussian function with mean and sigma.
Definition TMath.cxx:471
Double_t Sqrt(Double_t x)
Returns the square root of x.
Definition TMath.h:673
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Returns x raised to the power y.
Definition TMath.h:732
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition TMath.cxx:353
constexpr Double_t Pi()
Definition TMath.h:40
Double_t Sin(Double_t)
Returns the sine of an angle of x radians.
Definition TMath.h:599
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition TMathBase.h:124