Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodPDERS.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Yair Mahalalel, Joerg Stelzer, Helge Voss, Kai Voss
3
4/***********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodPDERS *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation *
12 * *
13 * Authors (alphabetical): *
14 * Krzysztof Danielowski <danielow@cern.ch> - IFJ PAN & AGH, Poland *
15 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
16 * Kamil Kraszewski <kalq@cern.ch> - IFJ PAN & UJ, Poland *
17 * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Poland *
18 * Yair Mahalalel <Yair.Mahalalel@cern.ch> - CERN, Switzerland *
19 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21 * *
22 * Copyright (c) 2005: *
23 * CERN, Switzerland *
24 * U. of Victoria, Canada *
25 * MPI-K Heidelberg, Germany *
26 * *
27 * Redistribution and use in source and binary forms, with or without *
28 * modification, are permitted according to the terms listed in LICENSE *
29 * (http://tmva.sourceforge.net/LICENSE) *
30 ***********************************************************************************/
31
32/*! \class TMVA::MethodPDERS
33\ingroup TMVA
34
35This is a generalization of the above Likelihood methods to \f$ N_{var} \f$
36dimensions, where \f$ N_{var} \f$ is the number of input variables
37used in the MVA. If the multi-dimensional probability density functions
38(PDFs) for signal and background were known, this method contains the entire
39physical information, and is therefore optimal. Usually, kernel estimation
40methods are used to approximate the PDFs using the events from the
41training sample.
42
43A very simple probability density estimator (PDE) has been suggested
44in [hep-ex/0211019](http://arxiv.org/abs/hep-ex/0211019). The
45PDE for a given test event is obtained from counting the (normalized)
46number of signal and background (training) events that occur in the
47"vicinity" of the test event. The volume that describes "vicinity" is
48user-defined. A [search method based on binary-trees](http://arxiv.org/abs/hep-ex/0211019)
49is used to effectively reduce the
50selection time for the range search. Three different volume definitions
51are optional:
52
53 - *MinMax:* the volume is defined in each dimension with respect
54 to the full variable range found in the training sample.
55 - *RMS:* the volume is defined in each dimensions with respect
56 to the RMS estimated from the training sample.
57 - *Adaptive:* a volume element is defined in each dimensions with
58 respect to the RMS estimated from the training sample. The overall
59 scale of the volume element is then determined for each event so
60 that the total number of events confined in the volume be within
61 a user-defined range.
62
63The adaptive range search is used by default.
64*/
65
66#include "TMVA/MethodPDERS.h"
67
68#include "TMVA/BinaryTree.h"
70#include "TMVA/Configurable.h"
72#include "TMVA/Event.h"
73#include "TMVA/IMethod.h"
74#include "TMVA/MethodBase.h"
75#include "TMVA/MsgLogger.h"
76#include "TMVA/RootFinder.h"
77#include "TMVA/Tools.h"
79#include "TMVA/Types.h"
80
81#include "ThreadLocalStorage.h"
82#include "TFile.h"
83#include "TMath.h"
84
85#include <cassert>
86#include <algorithm>
87
88namespace TMVA {
90};
91
92
93REGISTER_METHOD(PDERS)
94
96
97////////////////////////////////////////////////////////////////////////////////
98/// standard constructor for the PDERS method
99
101 const TString& methodTitle,
102 DataSetInfo& theData,
103 const TString& theOption) :
104 MethodBase( jobName, Types::kPDERS, methodTitle, theData, theOption),
105 fFcnCall(0),
106 fVRangeMode(kAdaptive),
107 fKernelEstimator(kBox),
108 fDelta(0),
109 fShift(0),
110 fScaleS(0),
111 fScaleB(0),
112 fDeltaFrac(0),
113 fGaussSigma(0),
114 fGaussSigmaNorm(0),
115 fNRegOut(0),
116 fNEventsMin(0),
117 fNEventsMax(0),
118 fMaxVIterations(0),
119 fInitialScale(0),
120 fInitializedVolumeEle(0),
121 fkNNMin(0),
122 fkNNMax(0),
123 fMax_distance(0),
124 fPrinted(0),
125 fNormTree(0)
126{
127 fHelpVolume = NULL;
128 fBinaryTree = NULL;
129}
130
131////////////////////////////////////////////////////////////////////////////////
132/// construct MethodPDERS through from file
133
135 const TString& theWeightFile) :
136 MethodBase( Types::kPDERS, theData, theWeightFile),
137 fFcnCall(0),
138 fVRangeMode(kAdaptive),
139 fKernelEstimator(kBox),
140 fDelta(0),
141 fShift(0),
142 fScaleS(0),
143 fScaleB(0),
144 fDeltaFrac(0),
145 fGaussSigma(0),
146 fGaussSigmaNorm(0),
147 fNRegOut(0),
148 fNEventsMin(0),
149 fNEventsMax(0),
150 fMaxVIterations(0),
151 fInitialScale(0),
152 fInitializedVolumeEle(0),
153 fkNNMin(0),
154 fkNNMax(0),
155 fMax_distance(0),
156 fPrinted(0),
157 fNormTree(0)
158{
159 fHelpVolume = NULL;
160 fBinaryTree = NULL;
161}
162
163////////////////////////////////////////////////////////////////////////////////
164/// PDERS can handle classification with 2 classes and regression with one or more regression-targets
165
167{
168 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
169 if (type == Types::kRegression) return kTRUE;
170 return kFALSE;
171}
172
173////////////////////////////////////////////////////////////////////////////////
174/// default initialisation routine called by all constructors
175
177{
178 fBinaryTree = NULL;
179
180 UpdateThis();
181
182 // default options
183 fDeltaFrac = 3.0;
184 fVRangeMode = kAdaptive;
185 fKernelEstimator = kBox;
186
187 // special options for Adaptive mode
188 fNEventsMin = 100;
189 fNEventsMax = 200;
190 fMaxVIterations = 150;
191 fInitialScale = 0.99;
192 fGaussSigma = 0.1;
193 fNormTree = kFALSE;
194
195 fkNNMin = Int_t(fNEventsMin);
196 fkNNMax = Int_t(fNEventsMax);
197
198 fInitializedVolumeEle = kFALSE;
199 fAverageRMS.clear();
200
201 // the minimum requirement to declare an event signal-like
202 SetSignalReferenceCut( 0.0 );
203}
204
205////////////////////////////////////////////////////////////////////////////////
206/// destructor
207
209{
210 if (fDelta) delete fDelta;
211 if (fShift) delete fShift;
212
213 if (NULL != fBinaryTree) delete fBinaryTree;
214}
215
216////////////////////////////////////////////////////////////////////////////////
217/// define the options (their key words) that can be set in the option string.
218///
219/// know options:
220/// - VolumeRangeMode <string> Method to determine volume range
221/// available values are:
222/// - MinMax
223/// - Unscaled
224/// - RMS
225/// - kNN
226/// - Adaptive <default>
227///
228/// - KernelEstimator <string> Kernel estimation function
229/// available values are:
230/// - Box <default>
231/// - Sphere
232/// - Teepee
233/// - Gauss
234/// - Sinc3
235/// - Sinc5
236/// - Sinc7
237/// - Sinc9
238/// - Sinc11
239/// - Lanczos2
240/// - Lanczos3
241/// - Lanczos5
242/// - Lanczos8
243/// - Trim
244///
245/// - DeltaFrac <float> Ratio of #EventsMin/#EventsMax for MinMax and RMS volume range
246/// - NEventsMin <int> Minimum number of events for adaptive volume range
247/// - NEventsMax <int> Maximum number of events for adaptive volume range
248/// - MaxVIterations <int> Maximum number of iterations for adaptive volume range
249/// - InitialScale <float> Initial scale for adaptive volume range
250/// - GaussSigma <float> Width with respect to the volume size of Gaussian kernel estimator
251
253{
254 DeclareOptionRef(fVolumeRange="Adaptive", "VolumeRangeMode", "Method to determine volume size");
255 AddPreDefVal(TString("Unscaled"));
256 AddPreDefVal(TString("MinMax"));
257 AddPreDefVal(TString("RMS"));
258 AddPreDefVal(TString("Adaptive"));
259 AddPreDefVal(TString("kNN"));
260
261 DeclareOptionRef(fKernelString="Box", "KernelEstimator", "Kernel estimation function");
262 AddPreDefVal(TString("Box"));
263 AddPreDefVal(TString("Sphere"));
264 AddPreDefVal(TString("Teepee"));
265 AddPreDefVal(TString("Gauss"));
266 AddPreDefVal(TString("Sinc3"));
267 AddPreDefVal(TString("Sinc5"));
268 AddPreDefVal(TString("Sinc7"));
269 AddPreDefVal(TString("Sinc9"));
270 AddPreDefVal(TString("Sinc11"));
271 AddPreDefVal(TString("Lanczos2"));
272 AddPreDefVal(TString("Lanczos3"));
273 AddPreDefVal(TString("Lanczos5"));
274 AddPreDefVal(TString("Lanczos8"));
275 AddPreDefVal(TString("Trim"));
276
277 DeclareOptionRef(fDeltaFrac , "DeltaFrac", "nEventsMin/Max for minmax and rms volume range");
278 DeclareOptionRef(fNEventsMin , "NEventsMin", "nEventsMin for adaptive volume range");
279 DeclareOptionRef(fNEventsMax , "NEventsMax", "nEventsMax for adaptive volume range");
280 DeclareOptionRef(fMaxVIterations, "MaxVIterations", "MaxVIterations for adaptive volume range");
281 DeclareOptionRef(fInitialScale , "InitialScale", "InitialScale for adaptive volume range");
282 DeclareOptionRef(fGaussSigma , "GaussSigma", "Width (wrt volume size) of Gaussian kernel estimator");
283 DeclareOptionRef(fNormTree , "NormTree", "Normalize binary search tree");
284}
285
286////////////////////////////////////////////////////////////////////////////////
287/// process the options specified by the user
288
290{
291 if (IgnoreEventsWithNegWeightsInTraining()) {
292 Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
293 << GetMethodTypeName()
294 << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
295 << Endl;
296 }
297
298 fGaussSigmaNorm = fGaussSigma; // * TMath::Sqrt( Double_t(GetNvar()) );
299
300 fVRangeMode = MethodPDERS::kUnsupported;
301
302 if (fVolumeRange == "MinMax" ) fVRangeMode = kMinMax;
303 else if (fVolumeRange == "RMS" ) fVRangeMode = kRMS;
304 else if (fVolumeRange == "Adaptive" ) fVRangeMode = kAdaptive;
305 else if (fVolumeRange == "Unscaled" ) fVRangeMode = kUnscaled;
306 else if (fVolumeRange == "kNN" ) fVRangeMode = kkNN;
307 else {
308 Log() << kFATAL << "VolumeRangeMode parameter '" << fVolumeRange << "' unknown" << Endl;
309 }
310
311 if (fKernelString == "Box" ) fKernelEstimator = kBox;
312 else if (fKernelString == "Sphere" ) fKernelEstimator = kSphere;
313 else if (fKernelString == "Teepee" ) fKernelEstimator = kTeepee;
314 else if (fKernelString == "Gauss" ) fKernelEstimator = kGauss;
315 else if (fKernelString == "Sinc3" ) fKernelEstimator = kSinc3;
316 else if (fKernelString == "Sinc5" ) fKernelEstimator = kSinc5;
317 else if (fKernelString == "Sinc7" ) fKernelEstimator = kSinc7;
318 else if (fKernelString == "Sinc9" ) fKernelEstimator = kSinc9;
319 else if (fKernelString == "Sinc11" ) fKernelEstimator = kSinc11;
320 else if (fKernelString == "Lanczos2" ) fKernelEstimator = kLanczos2;
321 else if (fKernelString == "Lanczos3" ) fKernelEstimator = kLanczos3;
322 else if (fKernelString == "Lanczos5" ) fKernelEstimator = kLanczos5;
323 else if (fKernelString == "Lanczos8" ) fKernelEstimator = kLanczos8;
324 else if (fKernelString == "Trim" ) fKernelEstimator = kTrim;
325 else {
326 Log() << kFATAL << "KernelEstimator parameter '" << fKernelString << "' unknown" << Endl;
327 }
328
329 // TODO: Add parameter validation
330
331 Log() << kVERBOSE << "interpreted option string: vRangeMethod: '"
332 << (const char*)((fVRangeMode == kMinMax) ? "MinMax" :
333 (fVRangeMode == kUnscaled) ? "Unscaled" :
334 (fVRangeMode == kRMS ) ? "RMS" : "Adaptive") << "'" << Endl;
335 if (fVRangeMode == kMinMax || fVRangeMode == kRMS)
336 Log() << kVERBOSE << "deltaFrac: " << fDeltaFrac << Endl;
337 else
338 Log() << kVERBOSE << "nEventsMin/Max, maxVIterations, initialScale: "
339 << fNEventsMin << " " << fNEventsMax
340 << " " << fMaxVIterations << " " << fInitialScale << Endl;
341 Log() << kVERBOSE << "KernelEstimator = " << fKernelString << Endl;
342}
343
344////////////////////////////////////////////////////////////////////////////////
345/// this is a dummy training: the preparation work to do is the construction
346/// of the binary tree as a pointer chain. It is easier to directly save the
347/// trainingTree in the weight file, and to rebuild the binary tree in the
348/// test phase from scratch
349
351{
352 if (IsNormalised()) Log() << kFATAL << "\"Normalise\" option cannot be used with PDERS; "
353 << "please remove the option from the configuration string, or "
354 << "use \"!Normalise\""
355 << Endl;
356
357 CreateBinarySearchTree( Types::kTraining );
358
359 CalcAverages();
360 SetVolumeElement();
361
362 fInitializedVolumeEle = kTRUE;
363 ExitFromTraining();
364}
365
366////////////////////////////////////////////////////////////////////////////////
367/// init the size of a volume element using a defined fraction of the
368/// volume containing the entire events
369
371{
372 if (fInitializedVolumeEle == kFALSE) {
373 fInitializedVolumeEle = kTRUE;
374
375 // binary trees must exist
376 assert( fBinaryTree );
377
378 CalcAverages();
379 SetVolumeElement();
380 }
381
382 // cannot determine error
383 NoErrorCalc(err, errUpper);
384
385 return this->CRScalc( *GetEvent() );
386}
387
388////////////////////////////////////////////////////////////////////////////////
389
390const std::vector< Float_t >& TMVA::MethodPDERS::GetRegressionValues()
391{
392 if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>;
393 fRegressionReturnVal->clear();
394 // init the size of a volume element using a defined fraction of the
395 // volume containing the entire events
396 if (fInitializedVolumeEle == kFALSE) {
397 fInitializedVolumeEle = kTRUE;
398
399 // binary trees must exist
400 assert( fBinaryTree );
401
402 CalcAverages();
403
404 SetVolumeElement();
405 }
406
407 const Event* ev = GetEvent();
408 this->RRScalc( *ev, fRegressionReturnVal );
409
410 Event * evT = new Event(*ev);
411 UInt_t ivar = 0;
412 for (std::vector<Float_t>::iterator it = fRegressionReturnVal->begin(); it != fRegressionReturnVal->end(); ++it ) {
413 evT->SetTarget(ivar,(*it));
414 ivar++;
415 }
416
417 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
418 fRegressionReturnVal->clear();
419
420 for (ivar = 0; ivar<evT2->GetNTargets(); ivar++) {
421 fRegressionReturnVal->push_back(evT2->GetTarget(ivar));
422 }
423
424 delete evT;
425
426
427 return (*fRegressionReturnVal);
428}
429
430////////////////////////////////////////////////////////////////////////////////
431/// compute also average RMS values required for adaptive Gaussian
432
434{
435 if (fVRangeMode == kAdaptive || fVRangeMode == kRMS || fVRangeMode == kkNN ) {
436 fAverageRMS.clear();
437 fBinaryTree->CalcStatistics();
438
439 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
440 if (!DoRegression()){ //why there are separate rms for signal and background?
441 Float_t rmsS = fBinaryTree->RMS(Types::kSignal, ivar);
442 Float_t rmsB = fBinaryTree->RMS(Types::kBackground, ivar);
443 fAverageRMS.push_back( (rmsS + rmsB)*0.5 );
444 } else {
445 Float_t rms = fBinaryTree->RMS( ivar );
446 fAverageRMS.push_back( rms );
447 }
448 }
449 }
450}
451
452////////////////////////////////////////////////////////////////////////////////
453/// create binary search trees for signal and background
454
456{
457 if (NULL != fBinaryTree) delete fBinaryTree;
458 fBinaryTree = new BinarySearchTree();
459 if (fNormTree) {
460 fBinaryTree->SetNormalize( kTRUE );
461 }
462
463 fBinaryTree->Fill( GetEventCollection(type) );
464
465 if (fNormTree) {
466 fBinaryTree->NormalizeTree();
467 }
468
469 if (!DoRegression()) {
470 // these are the signal and background scales for the weights
471 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
472 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
473
474 Log() << kVERBOSE << "Signal and background scales: " << fScaleS << " " << fScaleB << Endl;
475 }
476}
477
478////////////////////////////////////////////////////////////////////////////////
479/// defines volume dimensions
480
482 if (GetNvar()==0) {
483 Log() << kFATAL << "GetNvar() == 0" << Endl;
484 return;
485 }
486
487 // init relative scales
488 fkNNMin = Int_t(fNEventsMin);
489 fkNNMax = Int_t(fNEventsMax);
490
491 if (fDelta) delete fDelta;
492 if (fShift) delete fShift;
493 fDelta = new std::vector<Float_t>( GetNvar() );
494 fShift = new std::vector<Float_t>( GetNvar() );
495
496 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
497 switch (fVRangeMode) {
498
499 case kRMS:
500 case kkNN:
501 case kAdaptive:
502 // sanity check
503 if (fAverageRMS.size() != GetNvar())
504 Log() << kFATAL << "<SetVolumeElement> RMS not computed: " << fAverageRMS.size() << Endl;
505 (*fDelta)[ivar] = fAverageRMS[ivar]*fDeltaFrac;
506 Log() << kVERBOSE << "delta of var[" << (*fInputVars)[ivar]
507 << "\t]: " << fAverageRMS[ivar]
508 << "\t | comp with |max - min|: " << (GetXmax( ivar ) - GetXmin( ivar ))
509 << Endl;
510 break;
511 case kMinMax:
512 (*fDelta)[ivar] = (GetXmax( ivar ) - GetXmin( ivar ))*fDeltaFrac;
513 break;
514 case kUnscaled:
515 (*fDelta)[ivar] = fDeltaFrac;
516 break;
517 default:
518 Log() << kFATAL << "<SetVolumeElement> unknown range-set mode: "
519 << fVRangeMode << Endl;
520 }
521 (*fShift)[ivar] = 0.5; // volume is centered around test value
522 }
523
524}
525
526////////////////////////////////////////////////////////////////////////////////
527/// Interface to RootFinder
528
530{
531 return ThisPDERS()->GetVolumeContentForRoot( scale );
532}
533
534////////////////////////////////////////////////////////////////////////////////
535/// count number of events in rescaled volume
536
538{
539 Volume v( *fHelpVolume );
540 v.ScaleInterval( scale );
541
542 Double_t count = GetBinaryTree()->SearchVolume( &v );
543
544 v.Delete();
545 return count;
546}
547
549 std::vector<const BinarySearchTreeNode*>& events,
550 Volume *volume )
551{
552 Float_t count = 0;
553
554 // -------------------------------------------------------------------------
555 //
556 // ==== test of volume search =====
557 //
558 // #define TMVA::MethodPDERS__countByHand__Debug__
559
560#ifdef TMVA_MethodPDERS__countByHand__Debug__
561
562 // starting values
563 count = fBinaryTree->SearchVolume( volume );
564
565 Int_t iS = 0, iB = 0;
566 UInt_t nvar = GetNvar();
567 for (UInt_t ievt_=0; ievt_<Data()->GetNTrainingEvents(); ievt_++) {
568 const Event * ev = GetTrainingEvent(ievt_);
569 Bool_t inV;
570 for (Int_t ivar=0; ivar<nvar; ivar++) {
571 Float_t x = ev->GetValue(ivar);
572 inV = (x > (*volume->Lower)[ivar] && x <= (*volume->Upper)[ivar]);
573 if (!inV) break;
574 }
575 if (inV) {
576 in++;
577 }
578 }
579 Log() << kVERBOSE << "debug: my test: " << in << Endl;// <- ***********tree
580 Log() << kVERBOSE << "debug: binTree: " << count << Endl << Endl;// <- ***********tree
581
582#endif
583
584 // -------------------------------------------------------------------------
585
586 if (fVRangeMode == kRMS || fVRangeMode == kMinMax || fVRangeMode == kUnscaled) { // Constant volume
587
588 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
589 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
590 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
591 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
592 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
593 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
594 }
595 Volume* svolume = new Volume( lb, ub );
596 // starting values
597
598 fBinaryTree->SearchVolume( svolume, &events );
599 }
600 else if (fVRangeMode == kAdaptive) { // adaptive volume
601
602 // -----------------------------------------------------------------------
603
604 // TODO: optimize, perhaps multi stage with broadening limits,
605 // or a different root finding method entirely,
606 if (MethodPDERS_UseFindRoot) {
607
608 // that won't need to search through large volume, where the bottle neck probably is
609
610 fHelpVolume = volume;
611
612 UpdateThis(); // necessary update of static pointer
613 RootFinder rootFinder( this, 0.01, 50, 200, 10 );
614 Double_t scale = rootFinder.Root( (fNEventsMin + fNEventsMax)/2.0 );
615
616 volume->ScaleInterval( scale );
617
618 fBinaryTree->SearchVolume( volume, &events );
619
620 fHelpVolume = NULL;
621 }
622 // -----------------------------------------------------------------------
623 else {
624
625 // starting values
626 count = fBinaryTree->SearchVolume( volume );
627
628 Float_t nEventsO = count;
629 Int_t i_=0;
630
631 while (nEventsO < fNEventsMin) { // this isn't a sain start... try again
632 volume->ScaleInterval( 1.15 );
633 count = fBinaryTree->SearchVolume( volume );
634 nEventsO = count;
635 i_++;
636 }
637 if (i_ > 50) Log() << kWARNING << "warning in event: " << e
638 << ": adaptive volume pre-adjustment reached "
639 << ">50 iterations in while loop (" << i_ << ")" << Endl;
640
641 Float_t nEventsN = nEventsO;
642 Float_t nEventsE = 0.5*(fNEventsMin + fNEventsMax);
643 Float_t scaleO = 1.0;
644 Float_t scaleN = fInitialScale;
645 Float_t scale = scaleN;
646 Float_t scaleBest = scaleN;
647 Float_t nEventsBest = nEventsN;
648
649 for (Int_t ic=1; ic<fMaxVIterations; ic++) {
650 if (nEventsN < fNEventsMin || nEventsN > fNEventsMax) {
651
652 // search for events in rescaled volume
653 Volume* v = new Volume( *volume );
654 v->ScaleInterval( scale );
655 nEventsN = fBinaryTree->SearchVolume( v );
656
657 // determine next iteration (linear approximation)
658 if (nEventsN > 1 && nEventsN - nEventsO != 0)
659 if (scaleN - scaleO != 0)
660 scale += (scaleN - scaleO)/(nEventsN - nEventsO)*(nEventsE - nEventsN);
661 else
662 scale += (-0.01); // should not actually occur...
663 else
664 scale += 0.5; // use much larger volume
665
666 // save old scale
667 scaleN = scale;
668
669 // take if better (don't accept it if too small number of events)
670 if (TMath::Abs(nEventsN - nEventsE) < TMath::Abs(nEventsBest - nEventsE) &&
671 (nEventsN >= fNEventsMin || nEventsBest < nEventsN)) {
672 nEventsBest = nEventsN;
673 scaleBest = scale;
674 }
675
676 v->Delete();
677 delete v;
678 }
679 else break;
680 }
681
682 // last sanity check
683 nEventsN = nEventsBest;
684 // include "1" to cover float precision
685 if (nEventsN < fNEventsMin-1 || nEventsN > fNEventsMax+1)
686 Log() << kWARNING << "warning in event " << e
687 << ": adaptive volume adjustment reached "
688 << "max. #iterations (" << fMaxVIterations << ")"
689 << "[ nEvents: " << nEventsN << " " << fNEventsMin << " " << fNEventsMax << "]"
690 << Endl;
691
692 volume->ScaleInterval( scaleBest );
693 fBinaryTree->SearchVolume( volume, &events );
694 }
695
696 // end of adaptive method
697
698 } else if (fVRangeMode == kkNN) {
699 Volume v(*volume);
700
701 events.clear();
702 // check number of signals in beginning volume
703 Int_t kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 );
704 //if this number is too large return fkNNMax+1
705
706 Int_t t_times = 0; // number of iterations
707
708 while ( !(kNNcount >= fkNNMin && kNNcount <= fkNNMax) ) {
709 if (kNNcount < fkNNMin) { //if we have too less points
710 Float_t scale = 2; //better scale needed
711 volume->ScaleInterval( scale );
712 }
713 else if (kNNcount > fkNNMax) { //if we have too many points
714 Float_t scale = 0.1; //better scale needed
715 volume->ScaleInterval( scale );
716 }
717 events.clear();
718
719 v = *volume ;
720
721 kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 ); //new search
722
723 t_times++;
724
725 if (t_times == fMaxVIterations) {
726 Log() << kWARNING << "warning in event" << e
727 << ": kNN volume adjustment reached "
728 << "max. #iterations (" << fMaxVIterations << ")"
729 << "[ kNN: " << fkNNMin << " " << fkNNMax << Endl;
730 break;
731 }
732 }
733
734 //vector to normalize distance in each dimension
735 Double_t *dim_normalization = new Double_t[GetNvar()];
736 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
737 dim_normalization [ivar] = 1.0 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
738 }
739
740 std::vector<const BinarySearchTreeNode*> tempVector; // temporary vector for events
741
742 if (kNNcount >= fkNNMin) {
743 std::vector<Double_t> *distances = new std::vector<Double_t>( kNNcount );
744
745 //counting the distance for each event
746 for (Int_t j=0;j< Int_t(events.size()) ;j++)
747 (*distances)[j] = GetNormalizedDistance ( e, *events[j], dim_normalization );
748
749 //counting the fkNNMin-th element
750 std::vector<Double_t>::iterator wsk = distances->begin();
751 for (Int_t j=0;j<fkNNMin-1;++j) ++wsk;
752 std::nth_element( distances->begin(), wsk, distances->end() );
753
754 //getting all elements that are closer than fkNNMin-th element
755 //signals
756 for (Int_t j=0;j<Int_t(events.size());j++) {
757 Double_t dist = GetNormalizedDistance( e, *events[j], dim_normalization );
758
759 if (dist <= (*distances)[fkNNMin-1])
760 tempVector.push_back( events[j] );
761 }
762 fMax_distance = (*distances)[fkNNMin-1];
763 delete distances;
764 }
765 delete[] dim_normalization;
766 events = tempVector;
767
768 } else {
769
770 // troubles ahead...
771 Log() << kFATAL << "<GetSample> unknown RangeMode: " << fVRangeMode << Endl;
772 }
773 // -----------------------------------------------------------------------
774}
775
776////////////////////////////////////////////////////////////////////////////////
777
779{
780 std::vector<const BinarySearchTreeNode*> events;
781
782 // computes event weight by counting number of signal and background
783 // events (of reference sample) that are found within given volume
784 // defined by the event
785 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
786 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
787
788 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
789 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
790 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
791 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
792 }
793
794 Volume *volume = new Volume( lb, ub );
795
796 GetSample( e, events, volume );
797 Double_t count = CKernelEstimate( e, events, *volume );
798 delete volume;
799 delete lb;
800 delete ub;
801
802 return count;
803}
804
805////////////////////////////////////////////////////////////////////////////////
806
807void TMVA::MethodPDERS::RRScalc( const Event& e, std::vector<Float_t>* count )
808{
809 std::vector<const BinarySearchTreeNode*> events;
810
811 // computes event weight by counting number of signal and background
812 // events (of reference sample) that are found within given volume
813 // defined by the event
814 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
815 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
816
817 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
818 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
819 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
820 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
821 }
822 Volume *volume = new Volume( lb, ub );
823
824 GetSample( e, events, volume );
825 RKernelEstimate( e, events, *volume, count );
826
827 delete volume;
828
829 return;
830}
831////////////////////////////////////////////////////////////////////////////////
832/// normalization factors so we can work with radius 1 hyperspheres
833
835 std::vector<const BinarySearchTreeNode*>& events, Volume& v )
836{
837 Double_t *dim_normalization = new Double_t[GetNvar()];
838 for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
839 dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
840
841 Double_t pdfSumS = 0;
842 Double_t pdfSumB = 0;
843
844 // Iteration over sample points
845 for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); ++iev) {
846
847 // First switch to the one dimensional distance
848 Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
849
850 // always working within the hyperelipsoid, except for when we don't
851 // note that rejection ratio goes to 1 as nvar goes to infinity
852 if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
853
854 if ( (*iev)->GetClass()==fSignalClass )
855 pdfSumS += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
856 else
857 pdfSumB += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
858 }
859 pdfSumS = KernelNormalization( pdfSumS < 0. ? 0. : pdfSumS );
860 pdfSumB = KernelNormalization( pdfSumB < 0. ? 0. : pdfSumB );
861
862 delete[] dim_normalization;
863
864 if (pdfSumS < 1e-20 && pdfSumB < 1e-20) return 0.5;
865 if (pdfSumB < 1e-20) return 1.0;
866 if (pdfSumS < 1e-20) return 0.0;
867
868 Float_t r = pdfSumB*fScaleB/(pdfSumS*fScaleS);
869 return 1.0/(r + 1.0); // TODO: propagate errors from here
870}
871
872////////////////////////////////////////////////////////////////////////////////
873/// normalization factors so we can work with radius 1 hyperspheres
874
876 std::vector<const BinarySearchTreeNode*>& events, Volume& v,
877 std::vector<Float_t>* pdfSum )
878{
879 Double_t *dim_normalization = new Double_t[GetNvar()];
880 for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
881 dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
882
883 // std::vector<Float_t> pdfSum;
884 pdfSum->clear();
885 Float_t pdfDiv = 0;
886 fNRegOut = 1; // for now, regression is just for 1 dimension
887
888 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
889 pdfSum->push_back( 0 );
890
891 // Iteration over sample points
892 for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); ++iev) {
893
894 // First switch to the one dimensional distance
895 Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
896
897 // always working within the hyperelipsoid, except for when we don't
898 // note that rejection ratio goes to 1 as nvar goes to infinity
899 if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
900
901 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++) {
902 pdfSum->at(ivar) += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight() * (*iev)->GetTargets()[ivar];
903 pdfDiv += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
904 }
905 }
906
907 delete[] dim_normalization;
908
909 if (pdfDiv == 0)
910 return;
911
912 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
913 pdfSum->at(ivar) /= pdfDiv;
914
915 return;
916}
917
918////////////////////////////////////////////////////////////////////////////////
919/// from the normalized euclidean distance calculate the distance
920/// for a certain kernel
921
923{
924 switch (fKernelEstimator) {
925 case kBox:
926 case kSphere:
927 return 1;
928 break;
929 case kTeepee:
930 return (1 - normalized_distance);
931 break;
932 case kGauss:
933 return TMath::Gaus( normalized_distance, 0, fGaussSigmaNorm, kFALSE);
934 break;
935 case kSinc3:
936 case kSinc5:
937 case kSinc7:
938 case kSinc9:
939 case kSinc11: {
940 Double_t side_crossings = 2 + ((int) fKernelEstimator) - ((int) kSinc3);
941 return NormSinc (side_crossings * normalized_distance);
942 }
943 break;
944 case kLanczos2:
945 return LanczosFilter (2, normalized_distance);
946 break;
947 case kLanczos3:
948 return LanczosFilter (3, normalized_distance);
949 break;
950 case kLanczos5:
951 return LanczosFilter (5, normalized_distance);
952 break;
953 case kLanczos8:
954 return LanczosFilter (8, normalized_distance);
955 break;
956 case kTrim: {
957 Double_t x = normalized_distance / fMax_distance;
958 x = 1 - x*x*x;
959 return x*x*x;
960 }
961 break;
962 default:
963 Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
964 break;
965 }
966
967 return 0;
968}
969
970////////////////////////////////////////////////////////////////////////////////
971/// Calculating the normalization factor only once (might need a reset at some point.
972/// Can the method be restarted with different params?)
973
975{
976 // Caching jammed to disable function.
977 // It's not really useful after all, badly implemented and untested :-)
978 TTHREAD_TLS(Double_t) ret = 1.0;
979
980 if (ret != 0.0) return ret*pdf;
981
982 // We first normalize by the volume of the hypersphere.
983 switch (fKernelEstimator) {
984 case kBox:
985 case kSphere:
986 ret = 1.;
987 break;
988 case kTeepee:
989 ret = (GetNvar() * (GetNvar() + 1) * TMath::Gamma (((Double_t) GetNvar()) / 2.)) /
990 ( TMath::Power (2., (Double_t) GetNvar() + 1) * TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.));
991 break;
992 case kGauss:
993 // We use full range integral here. Reasonable because of the fast function decay.
994 ret = 1. / TMath::Power ( 2 * TMath::Pi() * fGaussSigmaNorm * fGaussSigmaNorm, ((Double_t) GetNvar()) / 2.);
995 break;
996 case kSinc3:
997 case kSinc5:
998 case kSinc7:
999 case kSinc9:
1000 case kSinc11:
1001 case kLanczos2:
1002 case kLanczos3:
1003 case kLanczos5:
1004 case kLanczos8:
1005 // We use the full range integral here. Reasonable because the central lobe dominates it.
1006 ret = 1 / TMath::Power ( 2., (Double_t) GetNvar() );
1007 break;
1008 default:
1009 Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
1010 }
1011
1012 // Normalizing by the full volume
1013 ret *= ( TMath::Power (2., static_cast<Int_t>(GetNvar())) * TMath::Gamma (1 + (((Double_t) GetNvar()) / 2.)) ) /
1014 TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.);
1015
1016 return ret*pdf;
1017}
1018
1019////////////////////////////////////////////////////////////////////////////////
1020/// We use Euclidian metric here. Might not be best or most efficient.
1021
1023 const BinarySearchTreeNode &sample_event,
1024 Double_t *dim_normalization)
1025{
1026 Double_t ret=0;
1027 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1028 Double_t dist = dim_normalization[ivar] * (sample_event.GetEventV()[ivar] - base_event.GetValue(ivar));
1029 ret += dist*dist;
1030 }
1031 // DD 26.11.2008: bugfix: division by (GetNvar()) was missing for correct normalisation
1032 ret /= GetNvar();
1033 return TMath::Sqrt (ret);
1034}
1035
1036////////////////////////////////////////////////////////////////////////////////
1037/// NormSinc
1038
1040{
1041 if (x < 10e-10 && x > -10e-10) {
1042 return 1; // Poor man's l'Hopital
1043 }
1044
1045 Double_t pix = TMath::Pi() * x;
1046 Double_t sinc = TMath::Sin(pix) / pix;
1047 Double_t ret;
1048
1049 if (GetNvar() % 2)
1050 ret = TMath::Power (sinc, static_cast<Int_t>(GetNvar()));
1051 else
1052 ret = TMath::Abs (sinc) * TMath::Power (sinc, static_cast<Int_t>(GetNvar() - 1));
1053
1054 return ret;
1055}
1056
1057////////////////////////////////////////////////////////////////////////////////
1058/// Lanczos Filter
1059
1061{
1062 if (x < 10e-10 && x > -10e-10) {
1063 return 1; // Poor man's l'Hopital
1064 }
1065
1066 Double_t pix = TMath::Pi() * x;
1067 Double_t pixtimesn = pix * ((Double_t) level);
1068 Double_t lanczos = (TMath::Sin(pix) / pix) * (TMath::Sin(pixtimesn) / pixtimesn);
1069 Double_t ret;
1070
1071 if (GetNvar() % 2) ret = TMath::Power (lanczos, static_cast<Int_t>(GetNvar()));
1072 else ret = TMath::Abs (lanczos) * TMath::Power (lanczos, static_cast<Int_t>(GetNvar() - 1));
1073
1074 return ret;
1075}
1076
1077////////////////////////////////////////////////////////////////////////////////
1078/// statistical error estimate for RS estimator
1079
1081 Float_t sumW2S, Float_t sumW2B ) const
1082{
1083 Float_t c = fScaleB/fScaleS;
1084 Float_t d = countS + c*countB; d *= d;
1085
1086 if (d < 1e-10) return 1; // Error is zero because of B = S = 0
1087
1088 Float_t f = c*c/d/d;
1089 Float_t err = f*countB*countB*sumW2S + f*countS*countS*sumW2B;
1090
1091 if (err < 1e-10) return 1; // Error is zero because of B or S = 0
1092
1093 return sqrt(err);
1094}
1095
1096////////////////////////////////////////////////////////////////////////////////
1097/// write weights to xml file
1098
1099void TMVA::MethodPDERS::AddWeightsXMLTo( void* parent ) const
1100{
1101 void* wght = gTools().AddChild(parent, "Weights");
1102 if (fBinaryTree)
1103 fBinaryTree->AddXMLTo(wght);
1104 else
1105 Log() << kFATAL << "Signal and background binary search tree not available" << Endl;
1106 //Log() << kFATAL << "Please implement writing of weights as XML" << Endl;
1107}
1108
1109////////////////////////////////////////////////////////////////////////////////
1110
1112{
1113 if (NULL != fBinaryTree) delete fBinaryTree;
1114 void* treenode = gTools().GetChild(wghtnode);
1115 fBinaryTree = TMVA::BinarySearchTree::CreateFromXML(treenode);
1116 if(!fBinaryTree)
1117 Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1118 if(!fBinaryTree)
1119 Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1120 fBinaryTree->SetPeriode( GetNvar() );
1121 fBinaryTree->CalcStatistics();
1122 fBinaryTree->CountNodes();
1123 if (fBinaryTree->GetSumOfWeights( Types::kSignal ) > 0)
1124 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
1125 else fScaleS = 1;
1126 if (fBinaryTree->GetSumOfWeights( Types::kBackground ) > 0)
1127 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
1128 else fScaleB = 1;
1129 Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1130 CalcAverages();
1131 SetVolumeElement();
1132 fInitializedVolumeEle = kTRUE;
1133}
1134
1135////////////////////////////////////////////////////////////////////////////////
1136/// read weight info from file
1137
1139{
1140 if (NULL != fBinaryTree) delete fBinaryTree;
1141
1142 fBinaryTree = new BinarySearchTree();
1143
1144 istr >> *fBinaryTree;
1145
1146 fBinaryTree->SetPeriode( GetNvar() );
1147
1148 fBinaryTree->CalcStatistics();
1149
1150 fBinaryTree->CountNodes();
1151
1152 // these are the signal and background scales for the weights
1153 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
1154 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
1155
1156 Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1157
1158 CalcAverages();
1159
1160 SetVolumeElement();
1161
1162 fInitializedVolumeEle = kTRUE;
1163}
1164
1165////////////////////////////////////////////////////////////////////////////////
1166/// write training sample (TTree) to file
1167
1169{
1170}
1171
1172////////////////////////////////////////////////////////////////////////////////
1173/// read training sample from file
1174
1176{
1177}
1178
1179////////////////////////////////////////////////////////////////////////////////
1180/// static pointer to this object
1181
1183{
1184 return GetMethodPDERSThreadLocal();
1185}
1186////////////////////////////////////////////////////////////////////////////////
1187/// update static this pointer
1188
1190{
1191 GetMethodPDERSThreadLocal() = this;
1192}
1193
1194////////////////////////////////////////////////////////////////////////////////
1195/// write specific classifier response
1196
1197void TMVA::MethodPDERS::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1198{
1199 fout << " // not implemented for class: \"" << className << "\"" << std::endl;
1200 fout << "};" << std::endl;
1201}
1202
1203////////////////////////////////////////////////////////////////////////////////
1204/// get help message text
1205///
1206/// typical length of text line:
1207/// "|--------------------------------------------------------------|"
1208
1210{
1211 Log() << Endl;
1212 Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1213 Log() << Endl;
1214 Log() << "PDERS is a generalization of the projective likelihood classifier " << Endl;
1215 Log() << "to N dimensions, where N is the number of input variables used." << Endl;
1216 Log() << "In its adaptive form it is mostly equivalent to k-Nearest-Neighbor" << Endl;
1217 Log() << "(k-NN) methods. If the multidimensional PDF for signal and background" << Endl;
1218 Log() << "were known, this classifier would exploit the full information" << Endl;
1219 Log() << "contained in the input variables, and would hence be optimal. In " << Endl;
1220 Log() << "practice however, huge training samples are necessary to sufficiently " << Endl;
1221 Log() << "populate the multidimensional phase space. " << Endl;
1222 Log() << Endl;
1223 Log() << "The simplest implementation of PDERS counts the number of signal" << Endl;
1224 Log() << "and background events in the vicinity of a test event, and returns" << Endl;
1225 Log() << "a weight according to the majority species of the neighboring events." << Endl;
1226 Log() << "A more involved version of PDERS (selected by the option \"KernelEstimator\")" << Endl;
1227 Log() << "uses Kernel estimation methods to approximate the shape of the PDF." << Endl;
1228 Log() << Endl;
1229 Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
1230 Log() << Endl;
1231 Log() << "PDERS can be very powerful in case of strongly non-linear problems, " << Endl;
1232 Log() << "e.g., distinct islands of signal and background regions. Because of " << Endl;
1233 Log() << "the exponential growth of the phase space, it is important to restrict" << Endl;
1234 Log() << "the number of input variables (dimension) to the strictly necessary." << Endl;
1235 Log() << Endl;
1236 Log() << "Note that PDERS is a slowly responding classifier. Moreover, the necessity" << Endl;
1237 Log() << "to store the entire binary tree in memory, to avoid accessing virtual " << Endl;
1238 Log() << "memory, limits the number of training events that can effectively be " << Endl;
1239 Log() << "used to model the multidimensional PDF." << Endl;
1240 Log() << Endl;
1241 Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1242 Log() << Endl;
1243 Log() << "If the PDERS response is found too slow when using the adaptive volume " << Endl;
1244 Log() << "size (option \"VolumeRangeMode=Adaptive\"), it might be found beneficial" << Endl;
1245 Log() << "to reduce the number of events required in the volume, and/or to enlarge" << Endl;
1246 Log() << "the allowed range (\"NeventsMin/Max\"). PDERS is relatively insensitive" << Endl;
1247 Log() << "to the width (\"GaussSigma\") of the Gaussian kernel (if used)." << Endl;
1248}
@ kBox
Definition Buttons.h:29
#define REGISTER_METHOD(CLASS)
for example
ROOT::R::TRInterface & r
Definition Object.C:4
#define d(i)
Definition RSha256.hxx:102
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define e(i)
Definition RSha256.hxx:103
int Int_t
Definition RtypesCore.h:45
const Bool_t kFALSE
Definition RtypesCore.h:101
bool Bool_t
Definition RtypesCore.h:63
double Double_t
Definition RtypesCore.h:59
float Float_t
Definition RtypesCore.h:57
const Bool_t kTRUE
Definition RtypesCore.h:100
#define ClassImp(name)
Definition Rtypes.h:364
int type
Definition TGX11.cxx:121
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition TFile.h:54
Node for the BinarySearch or Decision Trees.
const std::vector< Float_t > & GetEventV() const
A simple Binary search tree including a volume search method.
static BinarySearchTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML
Class that contains all the data information.
Definition DataSetInfo.h:62
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition Event.cxx:236
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition Event.cxx:367
UInt_t GetNTargets() const
accessor to the number of targets
Definition Event.cxx:327
Float_t GetTarget(UInt_t itgt) const
Definition Event.h:102
Virtual base Class for all MVA method.
Definition MethodBase.h:111
This is a generalization of the above Likelihood methods to dimensions, where is the number of inpu...
Definition MethodPDERS.h:61
void WriteWeightsToStream(TFile &rf) const
write training sample (TTree) to file
void CreateBinarySearchTree(Types::ETreeType type)
create binary search trees for signal and background
BinarySearchTree * fBinaryTree
virtual ~MethodPDERS(void)
destructor
MethodPDERS(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor for the PDERS method
void GetSample(const Event &e, std::vector< const BinarySearchTreeNode * > &events, Volume *volume)
Float_t GetError(Float_t countS, Float_t countB, Float_t sumW2S, Float_t sumW2B) const
statistical error estimate for RS estimator
static MethodPDERS * ThisPDERS(void)
static pointer to this object
Double_t KernelNormalization(Double_t pdf)
Calculating the normalization factor only once (might need a reset at some point.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
init the size of a volume element using a defined fraction of the volume containing the entire events
void ReadWeightsFromXML(void *wghtnode)
void ProcessOptions()
process the options specified by the user
void RRScalc(const Event &, std::vector< Float_t > *count)
void GetHelpMessage() const
get help message text
void UpdateThis()
update static this pointer
void Train(void)
this is a dummy training: the preparation work to do is the construction of the binary tree as a poin...
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Double_t CRScalc(const Event &)
void DeclareOptions()
define the options (their key words) that can be set in the option string.
void CalcAverages()
compute also average RMS values required for adaptive Gaussian
void RKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &, std::vector< Float_t > *pdfSum)
normalization factors so we can work with radius 1 hyperspheres
void ReadWeightsFromStream(std::istream &istr)
read weight info from file
const std::vector< Float_t > & GetRegressionValues()
Double_t NormSinc(Double_t x)
NormSinc.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
PDERS can handle classification with 2 classes and regression with one or more regression-targets.
void AddWeightsXMLTo(void *parent) const
write weights to xml file
void SetVolumeElement(void)
defines volume dimensions
void Init(void)
default initialisation routine called by all constructors
Double_t LanczosFilter(Int_t level, Double_t x)
Lanczos Filter.
Double_t CKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &)
normalization factors so we can work with radius 1 hyperspheres
Double_t ApplyKernelFunction(Double_t normalized_distance)
from the normalized euclidean distance calculate the distance for a certain kernel
Double_t GetNormalizedDistance(const TMVA::Event &base_event, const BinarySearchTreeNode &sample_event, Double_t *dim_normalization)
We use Euclidian metric here. Might not be best or most efficient.
static Double_t IGetVolumeContentForRoot(Double_t)
Interface to RootFinder.
Double_t GetVolumeContentForRoot(Double_t)
count number of events in rescaled volume
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition RootFinder.h:48
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition Tools.cxx:1124
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:828
void * GetChild(void *parent, const char *childname=0)
get child node
Definition Tools.cxx:1150
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kBackground
Definition Types.h:136
@ kClassification
Definition Types.h:127
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
Volume for BinarySearchTree.
Definition Volume.h:47
void ScaleInterval(Double_t f)
"scale" the volume by symmetrically blowing up the interval in each dimension
Definition Volume.cxx:180
const Bool_t MethodPDERS_UseFindRoot
Basic string class.
Definition TString.h:136
Double_t x[n]
Definition legend1.C:17
create variable transformations
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Definition TMath.cxx:448
Double_t Sqrt(Double_t x)
Definition TMath.h:641
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition TMath.h:685
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition TMath.cxx:348
constexpr Double_t Pi()
Definition TMath.h:37
Double_t Sin(Double_t)
Definition TMath.h:589
Short_t Abs(Short_t d)
Definition TMathBase.h:120