Logo ROOT  
Reference Guide
MethodPDERS.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Yair Mahalalel, Joerg Stelzer, Helge Voss, Kai Voss
3
4/***********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodPDERS *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation *
12 * *
13 * Authors (alphabetical): *
14 * Krzysztof Danielowski <danielow@cern.ch> - IFJ PAN & AGH, Poland *
15 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
16 * Kamil Kraszewski <kalq@cern.ch> - IFJ PAN & UJ, Poland *
17 * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Poland *
18 * Yair Mahalalel <Yair.Mahalalel@cern.ch> - CERN, Switzerland *
19 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21 * *
22 * Copyright (c) 2005: *
23 * CERN, Switzerland *
24 * U. of Victoria, Canada *
25 * MPI-K Heidelberg, Germany *
26 * *
27 * Redistribution and use in source and binary forms, with or without *
28 * modification, are permitted according to the terms listed in LICENSE *
29 * (http://tmva.sourceforge.net/LICENSE) *
30 ***********************************************************************************/
31
32/*! \class TMVA::MethodPDERS
33\ingroup TMVA
34
35This is a generalization of the above Likelihood methods to \f$ N_{var} \f$
36dimensions, where \f$ N_{var} \f$ is the number of input variables
37used in the MVA. If the multi-dimensional probability density functions
38(PDFs) for signal and background were known, this method contains the entire
39physical information, and is therefore optimal. Usually, kernel estimation
40methods are used to approximate the PDFs using the events from the
41training sample.
42
43A very simple probability density estimator (PDE) has been suggested
44in [hep-ex/0211019](http://arxiv.org/abs/hep-ex/0211019). The
45PDE for a given test event is obtained from counting the (normalized)
46number of signal and background (training) events that occur in the
47"vicinity" of the test event. The volume that describes "vicinity" is
48user-defined. A [search method based on binary-trees](http://arxiv.org/abs/hep-ex/0211019)
49is used to effectively reduce the
50selection time for the range search. Three different volume definitions
51are optional:
52
53 - *MinMax:* the volume is defined in each dimension with respect
54 to the full variable range found in the training sample.
55 - *RMS:* the volume is defined in each dimensions with respect
56 to the RMS estimated from the training sample.
57 - *Adaptive:* a volume element is defined in each dimensions with
58 respect to the RMS estimated from the training sample. The overall
59 scale of the volume element is then determined for each event so
60 that the total number of events confined in the volume be within
61 a user-defined range.
62
63The adaptive range search is used by default.
64*/
65
66#include "TMVA/MethodPDERS.h"
67
68#include "TMVA/BinaryTree.h"
70#include "TMVA/Configurable.h"
72#include "TMVA/Event.h"
73#include "TMVA/IMethod.h"
74#include "TMVA/MethodBase.h"
75#include "TMVA/MsgLogger.h"
76#include "TMVA/RootFinder.h"
77#include "TMVA/Tools.h"
79#include "TMVA/Types.h"
80
81#include "ThreadLocalStorage.h"
82#include "TBuffer.h"
83#include "TFile.h"
84#include "TMath.h"
85
86#include <assert.h>
87#include <algorithm>
88
89namespace TMVA {
90 const Bool_t MethodPDERS_UseFindRoot = kFALSE;
91};
92
93
94REGISTER_METHOD(PDERS)
95
97
98////////////////////////////////////////////////////////////////////////////////
99/// standard constructor for the PDERS method
100
102 const TString& methodTitle,
103 DataSetInfo& theData,
104 const TString& theOption) :
105 MethodBase( jobName, Types::kPDERS, methodTitle, theData, theOption),
106 fFcnCall(0),
107 fVRangeMode(kAdaptive),
108 fKernelEstimator(kBox),
109 fDelta(0),
110 fShift(0),
111 fScaleS(0),
112 fScaleB(0),
113 fDeltaFrac(0),
114 fGaussSigma(0),
115 fGaussSigmaNorm(0),
116 fNRegOut(0),
117 fNEventsMin(0),
118 fNEventsMax(0),
119 fMaxVIterations(0),
120 fInitialScale(0),
121 fInitializedVolumeEle(0),
122 fkNNMin(0),
123 fkNNMax(0),
124 fMax_distance(0),
125 fPrinted(0),
126 fNormTree(0)
127{
128 fHelpVolume = NULL;
129 fBinaryTree = NULL;
130}
131
132////////////////////////////////////////////////////////////////////////////////
133/// construct MethodPDERS through from file
134
136 const TString& theWeightFile) :
137 MethodBase( Types::kPDERS, theData, theWeightFile),
138 fFcnCall(0),
139 fVRangeMode(kAdaptive),
140 fKernelEstimator(kBox),
141 fDelta(0),
142 fShift(0),
143 fScaleS(0),
144 fScaleB(0),
145 fDeltaFrac(0),
146 fGaussSigma(0),
147 fGaussSigmaNorm(0),
148 fNRegOut(0),
149 fNEventsMin(0),
150 fNEventsMax(0),
151 fMaxVIterations(0),
152 fInitialScale(0),
153 fInitializedVolumeEle(0),
154 fkNNMin(0),
155 fkNNMax(0),
156 fMax_distance(0),
157 fPrinted(0),
158 fNormTree(0)
159{
160 fHelpVolume = NULL;
161 fBinaryTree = NULL;
162}
163
164////////////////////////////////////////////////////////////////////////////////
165/// PDERS can handle classification with 2 classes and regression with one or more regression-targets
166
168{
169 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
170 if (type == Types::kRegression) return kTRUE;
171 return kFALSE;
172}
173
174////////////////////////////////////////////////////////////////////////////////
175/// default initialisation routine called by all constructors
176
178{
179 fBinaryTree = NULL;
180
181 UpdateThis();
182
183 // default options
184 fDeltaFrac = 3.0;
185 fVRangeMode = kAdaptive;
186 fKernelEstimator = kBox;
187
188 // special options for Adaptive mode
189 fNEventsMin = 100;
190 fNEventsMax = 200;
191 fMaxVIterations = 150;
192 fInitialScale = 0.99;
193 fGaussSigma = 0.1;
194 fNormTree = kFALSE;
195
196 fkNNMin = Int_t(fNEventsMin);
197 fkNNMax = Int_t(fNEventsMax);
198
199 fInitializedVolumeEle = kFALSE;
200 fAverageRMS.clear();
201
202 // the minimum requirement to declare an event signal-like
203 SetSignalReferenceCut( 0.0 );
204}
205
206////////////////////////////////////////////////////////////////////////////////
207/// destructor
208
210{
211 if (fDelta) delete fDelta;
212 if (fShift) delete fShift;
213
214 if (NULL != fBinaryTree) delete fBinaryTree;
215}
216
217////////////////////////////////////////////////////////////////////////////////
218/// define the options (their key words) that can be set in the option string.
219///
220/// know options:
221/// - VolumeRangeMode <string> Method to determine volume range
222/// available values are:
223/// - MinMax
224/// - Unscaled
225/// - RMS
226/// - kNN
227/// - Adaptive <default>
228///
229/// - KernelEstimator <string> Kernel estimation function
230/// available values are:
231/// - Box <default>
232/// - Sphere
233/// - Teepee
234/// - Gauss
235/// - Sinc3
236/// - Sinc5
237/// - Sinc7
238/// - Sinc9
239/// - Sinc11
240/// - Lanczos2
241/// - Lanczos3
242/// - Lanczos5
243/// - Lanczos8
244/// - Trim
245///
246/// - DeltaFrac <float> Ratio of #EventsMin/#EventsMax for MinMax and RMS volume range
247/// - NEventsMin <int> Minimum number of events for adaptive volume range
248/// - NEventsMax <int> Maximum number of events for adaptive volume range
249/// - MaxVIterations <int> Maximum number of iterations for adaptive volume range
250/// - InitialScale <float> Initial scale for adaptive volume range
251/// - GaussSigma <float> Width with respect to the volume size of Gaussian kernel estimator
252
254{
255 DeclareOptionRef(fVolumeRange="Adaptive", "VolumeRangeMode", "Method to determine volume size");
256 AddPreDefVal(TString("Unscaled"));
257 AddPreDefVal(TString("MinMax"));
258 AddPreDefVal(TString("RMS"));
259 AddPreDefVal(TString("Adaptive"));
260 AddPreDefVal(TString("kNN"));
261
262 DeclareOptionRef(fKernelString="Box", "KernelEstimator", "Kernel estimation function");
263 AddPreDefVal(TString("Box"));
264 AddPreDefVal(TString("Sphere"));
265 AddPreDefVal(TString("Teepee"));
266 AddPreDefVal(TString("Gauss"));
267 AddPreDefVal(TString("Sinc3"));
268 AddPreDefVal(TString("Sinc5"));
269 AddPreDefVal(TString("Sinc7"));
270 AddPreDefVal(TString("Sinc9"));
271 AddPreDefVal(TString("Sinc11"));
272 AddPreDefVal(TString("Lanczos2"));
273 AddPreDefVal(TString("Lanczos3"));
274 AddPreDefVal(TString("Lanczos5"));
275 AddPreDefVal(TString("Lanczos8"));
276 AddPreDefVal(TString("Trim"));
277
278 DeclareOptionRef(fDeltaFrac , "DeltaFrac", "nEventsMin/Max for minmax and rms volume range");
279 DeclareOptionRef(fNEventsMin , "NEventsMin", "nEventsMin for adaptive volume range");
280 DeclareOptionRef(fNEventsMax , "NEventsMax", "nEventsMax for adaptive volume range");
281 DeclareOptionRef(fMaxVIterations, "MaxVIterations", "MaxVIterations for adaptive volume range");
282 DeclareOptionRef(fInitialScale , "InitialScale", "InitialScale for adaptive volume range");
283 DeclareOptionRef(fGaussSigma , "GaussSigma", "Width (wrt volume size) of Gaussian kernel estimator");
284 DeclareOptionRef(fNormTree , "NormTree", "Normalize binary search tree");
285}
286
287////////////////////////////////////////////////////////////////////////////////
288/// process the options specified by the user
289
291{
292 if (IgnoreEventsWithNegWeightsInTraining()) {
293 Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
294 << GetMethodTypeName()
295 << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
296 << Endl;
297 }
298
299 fGaussSigmaNorm = fGaussSigma; // * TMath::Sqrt( Double_t(GetNvar()) );
300
301 fVRangeMode = MethodPDERS::kUnsupported;
302
303 if (fVolumeRange == "MinMax" ) fVRangeMode = kMinMax;
304 else if (fVolumeRange == "RMS" ) fVRangeMode = kRMS;
305 else if (fVolumeRange == "Adaptive" ) fVRangeMode = kAdaptive;
306 else if (fVolumeRange == "Unscaled" ) fVRangeMode = kUnscaled;
307 else if (fVolumeRange == "kNN" ) fVRangeMode = kkNN;
308 else {
309 Log() << kFATAL << "VolumeRangeMode parameter '" << fVolumeRange << "' unknown" << Endl;
310 }
311
312 if (fKernelString == "Box" ) fKernelEstimator = kBox;
313 else if (fKernelString == "Sphere" ) fKernelEstimator = kSphere;
314 else if (fKernelString == "Teepee" ) fKernelEstimator = kTeepee;
315 else if (fKernelString == "Gauss" ) fKernelEstimator = kGauss;
316 else if (fKernelString == "Sinc3" ) fKernelEstimator = kSinc3;
317 else if (fKernelString == "Sinc5" ) fKernelEstimator = kSinc5;
318 else if (fKernelString == "Sinc7" ) fKernelEstimator = kSinc7;
319 else if (fKernelString == "Sinc9" ) fKernelEstimator = kSinc9;
320 else if (fKernelString == "Sinc11" ) fKernelEstimator = kSinc11;
321 else if (fKernelString == "Lanczos2" ) fKernelEstimator = kLanczos2;
322 else if (fKernelString == "Lanczos3" ) fKernelEstimator = kLanczos3;
323 else if (fKernelString == "Lanczos5" ) fKernelEstimator = kLanczos5;
324 else if (fKernelString == "Lanczos8" ) fKernelEstimator = kLanczos8;
325 else if (fKernelString == "Trim" ) fKernelEstimator = kTrim;
326 else {
327 Log() << kFATAL << "KernelEstimator parameter '" << fKernelString << "' unknown" << Endl;
328 }
329
330 // TODO: Add parameter validation
331
332 Log() << kVERBOSE << "interpreted option string: vRangeMethod: '"
333 << (const char*)((fVRangeMode == kMinMax) ? "MinMax" :
334 (fVRangeMode == kUnscaled) ? "Unscaled" :
335 (fVRangeMode == kRMS ) ? "RMS" : "Adaptive") << "'" << Endl;
336 if (fVRangeMode == kMinMax || fVRangeMode == kRMS)
337 Log() << kVERBOSE << "deltaFrac: " << fDeltaFrac << Endl;
338 else
339 Log() << kVERBOSE << "nEventsMin/Max, maxVIterations, initialScale: "
340 << fNEventsMin << " " << fNEventsMax
341 << " " << fMaxVIterations << " " << fInitialScale << Endl;
342 Log() << kVERBOSE << "KernelEstimator = " << fKernelString << Endl;
343}
344
345////////////////////////////////////////////////////////////////////////////////
346/// this is a dummy training: the preparation work to do is the construction
347/// of the binary tree as a pointer chain. It is easier to directly save the
348/// trainingTree in the weight file, and to rebuild the binary tree in the
349/// test phase from scratch
350
352{
353 if (IsNormalised()) Log() << kFATAL << "\"Normalise\" option cannot be used with PDERS; "
354 << "please remove the option from the configuration string, or "
355 << "use \"!Normalise\""
356 << Endl;
357
358 CreateBinarySearchTree( Types::kTraining );
359
360 CalcAverages();
361 SetVolumeElement();
362
363 fInitializedVolumeEle = kTRUE;
364 ExitFromTraining();
365}
366
367////////////////////////////////////////////////////////////////////////////////
368/// init the size of a volume element using a defined fraction of the
369/// volume containing the entire events
370
372{
373 if (fInitializedVolumeEle == kFALSE) {
374 fInitializedVolumeEle = kTRUE;
375
376 // binary trees must exist
377 assert( fBinaryTree );
378
379 CalcAverages();
380 SetVolumeElement();
381 }
382
383 // cannot determine error
384 NoErrorCalc(err, errUpper);
385
386 return this->CRScalc( *GetEvent() );
387}
388
389////////////////////////////////////////////////////////////////////////////////
390
391const std::vector< Float_t >& TMVA::MethodPDERS::GetRegressionValues()
392{
393 if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>;
394 fRegressionReturnVal->clear();
395 // init the size of a volume element using a defined fraction of the
396 // volume containing the entire events
397 if (fInitializedVolumeEle == kFALSE) {
398 fInitializedVolumeEle = kTRUE;
399
400 // binary trees must exist
401 assert( fBinaryTree );
402
403 CalcAverages();
404
405 SetVolumeElement();
406 }
407
408 const Event* ev = GetEvent();
409 this->RRScalc( *ev, fRegressionReturnVal );
410
411 Event * evT = new Event(*ev);
412 UInt_t ivar = 0;
413 for (std::vector<Float_t>::iterator it = fRegressionReturnVal->begin(); it != fRegressionReturnVal->end(); ++it ) {
414 evT->SetTarget(ivar,(*it));
415 ivar++;
416 }
417
418 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
419 fRegressionReturnVal->clear();
420
421 for (ivar = 0; ivar<evT2->GetNTargets(); ivar++) {
422 fRegressionReturnVal->push_back(evT2->GetTarget(ivar));
423 }
424
425 delete evT;
426
427
428 return (*fRegressionReturnVal);
429}
430
431////////////////////////////////////////////////////////////////////////////////
432/// compute also average RMS values required for adaptive Gaussian
433
435{
436 if (fVRangeMode == kAdaptive || fVRangeMode == kRMS || fVRangeMode == kkNN ) {
437 fAverageRMS.clear();
438 fBinaryTree->CalcStatistics();
439
440 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
441 if (!DoRegression()){ //why there are separate rms for signal and background?
442 Float_t rmsS = fBinaryTree->RMS(Types::kSignal, ivar);
443 Float_t rmsB = fBinaryTree->RMS(Types::kBackground, ivar);
444 fAverageRMS.push_back( (rmsS + rmsB)*0.5 );
445 } else {
446 Float_t rms = fBinaryTree->RMS( ivar );
447 fAverageRMS.push_back( rms );
448 }
449 }
450 }
451}
452
453////////////////////////////////////////////////////////////////////////////////
454/// create binary search trees for signal and background
455
457{
458 if (NULL != fBinaryTree) delete fBinaryTree;
459 fBinaryTree = new BinarySearchTree();
460 if (fNormTree) {
461 fBinaryTree->SetNormalize( kTRUE );
462 }
463
464 fBinaryTree->Fill( GetEventCollection(type) );
465
466 if (fNormTree) {
467 fBinaryTree->NormalizeTree();
468 }
469
470 if (!DoRegression()) {
471 // these are the signal and background scales for the weights
472 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
473 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
474
475 Log() << kVERBOSE << "Signal and background scales: " << fScaleS << " " << fScaleB << Endl;
476 }
477}
478
479////////////////////////////////////////////////////////////////////////////////
480/// defines volume dimensions
481
483 if (GetNvar()==0) {
484 Log() << kFATAL << "GetNvar() == 0" << Endl;
485 return;
486 }
487
488 // init relative scales
489 fkNNMin = Int_t(fNEventsMin);
490 fkNNMax = Int_t(fNEventsMax);
491
492 if (fDelta) delete fDelta;
493 if (fShift) delete fShift;
494 fDelta = new std::vector<Float_t>( GetNvar() );
495 fShift = new std::vector<Float_t>( GetNvar() );
496
497 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
498 switch (fVRangeMode) {
499
500 case kRMS:
501 case kkNN:
502 case kAdaptive:
503 // sanity check
504 if (fAverageRMS.size() != GetNvar())
505 Log() << kFATAL << "<SetVolumeElement> RMS not computed: " << fAverageRMS.size() << Endl;
506 (*fDelta)[ivar] = fAverageRMS[ivar]*fDeltaFrac;
507 Log() << kVERBOSE << "delta of var[" << (*fInputVars)[ivar]
508 << "\t]: " << fAverageRMS[ivar]
509 << "\t | comp with |max - min|: " << (GetXmax( ivar ) - GetXmin( ivar ))
510 << Endl;
511 break;
512 case kMinMax:
513 (*fDelta)[ivar] = (GetXmax( ivar ) - GetXmin( ivar ))*fDeltaFrac;
514 break;
515 case kUnscaled:
516 (*fDelta)[ivar] = fDeltaFrac;
517 break;
518 default:
519 Log() << kFATAL << "<SetVolumeElement> unknown range-set mode: "
520 << fVRangeMode << Endl;
521 }
522 (*fShift)[ivar] = 0.5; // volume is centered around test value
523 }
524
525}
526
527////////////////////////////////////////////////////////////////////////////////
528/// Interface to RootFinder
529
531{
532 return ThisPDERS()->GetVolumeContentForRoot( scale );
533}
534
535////////////////////////////////////////////////////////////////////////////////
536/// count number of events in rescaled volume
537
539{
540 Volume v( *fHelpVolume );
541 v.ScaleInterval( scale );
542
543 Double_t count = GetBinaryTree()->SearchVolume( &v );
544
545 v.Delete();
546 return count;
547}
548
550 std::vector<const BinarySearchTreeNode*>& events,
551 Volume *volume )
552{
553 Float_t count = 0;
554
555 // -------------------------------------------------------------------------
556 //
557 // ==== test of volume search =====
558 //
559 // #define TMVA::MethodPDERS__countByHand__Debug__
560
561#ifdef TMVA_MethodPDERS__countByHand__Debug__
562
563 // starting values
564 count = fBinaryTree->SearchVolume( volume );
565
566 Int_t iS = 0, iB = 0;
567 UInt_t nvar = GetNvar();
568 for (UInt_t ievt_=0; ievt_<Data()->GetNTrainingEvents(); ievt_++) {
569 const Event * ev = GetTrainingEvent(ievt_);
570 Bool_t inV;
571 for (Int_t ivar=0; ivar<nvar; ivar++) {
572 Float_t x = ev->GetValue(ivar);
573 inV = (x > (*volume->Lower)[ivar] && x <= (*volume->Upper)[ivar]);
574 if (!inV) break;
575 }
576 if (inV) {
577 in++;
578 }
579 }
580 Log() << kVERBOSE << "debug: my test: " << in << Endl;// <- ***********tree
581 Log() << kVERBOSE << "debug: binTree: " << count << Endl << Endl;// <- ***********tree
582
583#endif
584
585 // -------------------------------------------------------------------------
586
587 if (fVRangeMode == kRMS || fVRangeMode == kMinMax || fVRangeMode == kUnscaled) { // Constant volume
588
589 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
590 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
591 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
592 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
593 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
594 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
595 }
596 Volume* svolume = new Volume( lb, ub );
597 // starting values
598
599 fBinaryTree->SearchVolume( svolume, &events );
600 }
601 else if (fVRangeMode == kAdaptive) { // adaptive volume
602
603 // -----------------------------------------------------------------------
604
605 // TODO: optimize, perhaps multi stage with broadening limits,
606 // or a different root finding method entirely,
607 if (MethodPDERS_UseFindRoot) {
608
609 // that won't need to search through large volume, where the bottle neck probably is
610
611 fHelpVolume = volume;
612
613 UpdateThis(); // necessary update of static pointer
614 RootFinder rootFinder( this, 0.01, 50, 200, 10 );
615 Double_t scale = rootFinder.Root( (fNEventsMin + fNEventsMax)/2.0 );
616
617 volume->ScaleInterval( scale );
618
619 fBinaryTree->SearchVolume( volume, &events );
620
621 fHelpVolume = NULL;
622 }
623 // -----------------------------------------------------------------------
624 else {
625
626 // starting values
627 count = fBinaryTree->SearchVolume( volume );
628
629 Float_t nEventsO = count;
630 Int_t i_=0;
631
632 while (nEventsO < fNEventsMin) { // this isn't a sain start... try again
633 volume->ScaleInterval( 1.15 );
634 count = fBinaryTree->SearchVolume( volume );
635 nEventsO = count;
636 i_++;
637 }
638 if (i_ > 50) Log() << kWARNING << "warning in event: " << e
639 << ": adaptive volume pre-adjustment reached "
640 << ">50 iterations in while loop (" << i_ << ")" << Endl;
641
642 Float_t nEventsN = nEventsO;
643 Float_t nEventsE = 0.5*(fNEventsMin + fNEventsMax);
644 Float_t scaleO = 1.0;
645 Float_t scaleN = fInitialScale;
646 Float_t scale = scaleN;
647 Float_t scaleBest = scaleN;
648 Float_t nEventsBest = nEventsN;
649
650 for (Int_t ic=1; ic<fMaxVIterations; ic++) {
651 if (nEventsN < fNEventsMin || nEventsN > fNEventsMax) {
652
653 // search for events in rescaled volume
654 Volume* v = new Volume( *volume );
655 v->ScaleInterval( scale );
656 nEventsN = fBinaryTree->SearchVolume( v );
657
658 // determine next iteration (linear approximation)
659 if (nEventsN > 1 && nEventsN - nEventsO != 0)
660 if (scaleN - scaleO != 0)
661 scale += (scaleN - scaleO)/(nEventsN - nEventsO)*(nEventsE - nEventsN);
662 else
663 scale += (-0.01); // should not actually occur...
664 else
665 scale += 0.5; // use much larger volume
666
667 // save old scale
668 scaleN = scale;
669
670 // take if better (don't accept it if too small number of events)
671 if (TMath::Abs(nEventsN - nEventsE) < TMath::Abs(nEventsBest - nEventsE) &&
672 (nEventsN >= fNEventsMin || nEventsBest < nEventsN)) {
673 nEventsBest = nEventsN;
674 scaleBest = scale;
675 }
676
677 v->Delete();
678 delete v;
679 }
680 else break;
681 }
682
683 // last sanity check
684 nEventsN = nEventsBest;
685 // include "1" to cover float precision
686 if (nEventsN < fNEventsMin-1 || nEventsN > fNEventsMax+1)
687 Log() << kWARNING << "warning in event " << e
688 << ": adaptive volume adjustment reached "
689 << "max. #iterations (" << fMaxVIterations << ")"
690 << "[ nEvents: " << nEventsN << " " << fNEventsMin << " " << fNEventsMax << "]"
691 << Endl;
692
693 volume->ScaleInterval( scaleBest );
694 fBinaryTree->SearchVolume( volume, &events );
695 }
696
697 // end of adaptive method
698
699 } else if (fVRangeMode == kkNN) {
700 Volume v(*volume);
701
702 events.clear();
703 // check number of signals in beginning volume
704 Int_t kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 );
705 //if this number is too large return fkNNMax+1
706
707 Int_t t_times = 0; // number of iterations
708
709 while ( !(kNNcount >= fkNNMin && kNNcount <= fkNNMax) ) {
710 if (kNNcount < fkNNMin) { //if we have too less points
711 Float_t scale = 2; //better scale needed
712 volume->ScaleInterval( scale );
713 }
714 else if (kNNcount > fkNNMax) { //if we have too many points
715 Float_t scale = 0.1; //better scale needed
716 volume->ScaleInterval( scale );
717 }
718 events.clear();
719
720 v = *volume ;
721
722 kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 ); //new search
723
724 t_times++;
725
726 if (t_times == fMaxVIterations) {
727 Log() << kWARNING << "warning in event" << e
728 << ": kNN volume adjustment reached "
729 << "max. #iterations (" << fMaxVIterations << ")"
730 << "[ kNN: " << fkNNMin << " " << fkNNMax << Endl;
731 break;
732 }
733 }
734
735 //vector to normalize distance in each dimension
736 Double_t *dim_normalization = new Double_t[GetNvar()];
737 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
738 dim_normalization [ivar] = 1.0 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
739 }
740
741 std::vector<const BinarySearchTreeNode*> tempVector; // temporary vector for events
742
743 if (kNNcount >= fkNNMin) {
744 std::vector<Double_t> *distances = new std::vector<Double_t>( kNNcount );
745
746 //counting the distance for each event
747 for (Int_t j=0;j< Int_t(events.size()) ;j++)
748 (*distances)[j] = GetNormalizedDistance ( e, *events[j], dim_normalization );
749
750 //counting the fkNNMin-th element
751 std::vector<Double_t>::iterator wsk = distances->begin();
752 for (Int_t j=0;j<fkNNMin-1;++j) ++wsk;
753 std::nth_element( distances->begin(), wsk, distances->end() );
754
755 //getting all elements that are closer than fkNNMin-th element
756 //signals
757 for (Int_t j=0;j<Int_t(events.size());j++) {
758 Double_t dist = GetNormalizedDistance( e, *events[j], dim_normalization );
759
760 if (dist <= (*distances)[fkNNMin-1])
761 tempVector.push_back( events[j] );
762 }
763 fMax_distance = (*distances)[fkNNMin-1];
764 delete distances;
765 }
766 delete[] dim_normalization;
767 events = tempVector;
768
769 } else {
770
771 // troubles ahead...
772 Log() << kFATAL << "<GetSample> unknown RangeMode: " << fVRangeMode << Endl;
773 }
774 // -----------------------------------------------------------------------
775}
776
777////////////////////////////////////////////////////////////////////////////////
778
780{
781 std::vector<const BinarySearchTreeNode*> events;
782
783 // computes event weight by counting number of signal and background
784 // events (of reference sample) that are found within given volume
785 // defined by the event
786 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
787 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
788
789 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
790 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
791 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
792 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
793 }
794
795 Volume *volume = new Volume( lb, ub );
796
797 GetSample( e, events, volume );
798 Double_t count = CKernelEstimate( e, events, *volume );
799 delete volume;
800 delete lb;
801 delete ub;
802
803 return count;
804}
805
806////////////////////////////////////////////////////////////////////////////////
807
808void TMVA::MethodPDERS::RRScalc( const Event& e, std::vector<Float_t>* count )
809{
810 std::vector<const BinarySearchTreeNode*> events;
811
812 // computes event weight by counting number of signal and background
813 // events (of reference sample) that are found within given volume
814 // defined by the event
815 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
816 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
817
818 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
819 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
820 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
821 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
822 }
823 Volume *volume = new Volume( lb, ub );
824
825 GetSample( e, events, volume );
826 RKernelEstimate( e, events, *volume, count );
827
828 delete volume;
829
830 return;
831}
832////////////////////////////////////////////////////////////////////////////////
833/// normalization factors so we can work with radius 1 hyperspheres
834
836 std::vector<const BinarySearchTreeNode*>& events, Volume& v )
837{
838 Double_t *dim_normalization = new Double_t[GetNvar()];
839 for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
840 dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
841
842 Double_t pdfSumS = 0;
843 Double_t pdfSumB = 0;
844
845 // Iteration over sample points
846 for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); ++iev) {
847
848 // First switch to the one dimensional distance
849 Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
850
851 // always working within the hyperelipsoid, except for when we don't
852 // note that rejection ratio goes to 1 as nvar goes to infinity
853 if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
854
855 if ( (*iev)->GetClass()==fSignalClass )
856 pdfSumS += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
857 else
858 pdfSumB += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
859 }
860 pdfSumS = KernelNormalization( pdfSumS < 0. ? 0. : pdfSumS );
861 pdfSumB = KernelNormalization( pdfSumB < 0. ? 0. : pdfSumB );
862
863 delete[] dim_normalization;
864
865 if (pdfSumS < 1e-20 && pdfSumB < 1e-20) return 0.5;
866 if (pdfSumB < 1e-20) return 1.0;
867 if (pdfSumS < 1e-20) return 0.0;
868
869 Float_t r = pdfSumB*fScaleB/(pdfSumS*fScaleS);
870 return 1.0/(r + 1.0); // TODO: propagate errors from here
871}
872
873////////////////////////////////////////////////////////////////////////////////
874/// normalization factors so we can work with radius 1 hyperspheres
875
877 std::vector<const BinarySearchTreeNode*>& events, Volume& v,
878 std::vector<Float_t>* pdfSum )
879{
880 Double_t *dim_normalization = new Double_t[GetNvar()];
881 for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
882 dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
883
884 // std::vector<Float_t> pdfSum;
885 pdfSum->clear();
886 Float_t pdfDiv = 0;
887 fNRegOut = 1; // for now, regression is just for 1 dimension
888
889 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
890 pdfSum->push_back( 0 );
891
892 // Iteration over sample points
893 for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); ++iev) {
894
895 // First switch to the one dimensional distance
896 Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
897
898 // always working within the hyperelipsoid, except for when we don't
899 // note that rejection ratio goes to 1 as nvar goes to infinity
900 if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
901
902 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++) {
903 pdfSum->at(ivar) += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight() * (*iev)->GetTargets()[ivar];
904 pdfDiv += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
905 }
906 }
907
908 delete[] dim_normalization;
909
910 if (pdfDiv == 0)
911 return;
912
913 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
914 pdfSum->at(ivar) /= pdfDiv;
915
916 return;
917}
918
919////////////////////////////////////////////////////////////////////////////////
920/// from the normalized euclidean distance calculate the distance
921/// for a certain kernel
922
924{
925 switch (fKernelEstimator) {
926 case kBox:
927 case kSphere:
928 return 1;
929 break;
930 case kTeepee:
931 return (1 - normalized_distance);
932 break;
933 case kGauss:
934 return TMath::Gaus( normalized_distance, 0, fGaussSigmaNorm, kFALSE);
935 break;
936 case kSinc3:
937 case kSinc5:
938 case kSinc7:
939 case kSinc9:
940 case kSinc11: {
941 Double_t side_crossings = 2 + ((int) fKernelEstimator) - ((int) kSinc3);
942 return NormSinc (side_crossings * normalized_distance);
943 }
944 break;
945 case kLanczos2:
946 return LanczosFilter (2, normalized_distance);
947 break;
948 case kLanczos3:
949 return LanczosFilter (3, normalized_distance);
950 break;
951 case kLanczos5:
952 return LanczosFilter (5, normalized_distance);
953 break;
954 case kLanczos8:
955 return LanczosFilter (8, normalized_distance);
956 break;
957 case kTrim: {
958 Double_t x = normalized_distance / fMax_distance;
959 x = 1 - x*x*x;
960 return x*x*x;
961 }
962 break;
963 default:
964 Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
965 break;
966 }
967
968 return 0;
969}
970
971////////////////////////////////////////////////////////////////////////////////
972/// Calculating the normalization factor only once (might need a reset at some point.
973/// Can the method be restarted with different params?)
974
976{
977 // Caching jammed to disable function.
978 // It's not really useful after all, badly implemented and untested :-)
979 TTHREAD_TLS(Double_t) ret = 1.0;
980
981 if (ret != 0.0) return ret*pdf;
982
983 // We first normalize by the volume of the hypersphere.
984 switch (fKernelEstimator) {
985 case kBox:
986 case kSphere:
987 ret = 1.;
988 break;
989 case kTeepee:
990 ret = (GetNvar() * (GetNvar() + 1) * TMath::Gamma (((Double_t) GetNvar()) / 2.)) /
991 ( TMath::Power (2., (Double_t) GetNvar() + 1) * TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.));
992 break;
993 case kGauss:
994 // We use full range integral here. Reasonable because of the fast function decay.
995 ret = 1. / TMath::Power ( 2 * TMath::Pi() * fGaussSigmaNorm * fGaussSigmaNorm, ((Double_t) GetNvar()) / 2.);
996 break;
997 case kSinc3:
998 case kSinc5:
999 case kSinc7:
1000 case kSinc9:
1001 case kSinc11:
1002 case kLanczos2:
1003 case kLanczos3:
1004 case kLanczos5:
1005 case kLanczos8:
1006 // We use the full range integral here. Reasonable because the central lobe dominates it.
1007 ret = 1 / TMath::Power ( 2., (Double_t) GetNvar() );
1008 break;
1009 default:
1010 Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
1011 }
1012
1013 // Normalizing by the full volume
1014 ret *= ( TMath::Power (2., static_cast<Int_t>(GetNvar())) * TMath::Gamma (1 + (((Double_t) GetNvar()) / 2.)) ) /
1015 TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.);
1016
1017 return ret*pdf;
1018}
1019
1020////////////////////////////////////////////////////////////////////////////////
1021/// We use Euclidian metric here. Might not be best or most efficient.
1022
1024 const BinarySearchTreeNode &sample_event,
1025 Double_t *dim_normalization)
1026{
1027 Double_t ret=0;
1028 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1029 Double_t dist = dim_normalization[ivar] * (sample_event.GetEventV()[ivar] - base_event.GetValue(ivar));
1030 ret += dist*dist;
1031 }
1032 // DD 26.11.2008: bugfix: division by (GetNvar()) was missing for correct normalisation
1033 ret /= GetNvar();
1034 return TMath::Sqrt (ret);
1035}
1036
1037////////////////////////////////////////////////////////////////////////////////
1038/// NormSinc
1039
1041{
1042 if (x < 10e-10 && x > -10e-10) {
1043 return 1; // Poor man's l'Hopital
1044 }
1045
1046 Double_t pix = TMath::Pi() * x;
1047 Double_t sinc = TMath::Sin(pix) / pix;
1048 Double_t ret;
1049
1050 if (GetNvar() % 2)
1051 ret = TMath::Power (sinc, static_cast<Int_t>(GetNvar()));
1052 else
1053 ret = TMath::Abs (sinc) * TMath::Power (sinc, static_cast<Int_t>(GetNvar() - 1));
1054
1055 return ret;
1056}
1057
1058////////////////////////////////////////////////////////////////////////////////
1059/// Lanczos Filter
1060
1062{
1063 if (x < 10e-10 && x > -10e-10) {
1064 return 1; // Poor man's l'Hopital
1065 }
1066
1067 Double_t pix = TMath::Pi() * x;
1068 Double_t pixtimesn = pix * ((Double_t) level);
1069 Double_t lanczos = (TMath::Sin(pix) / pix) * (TMath::Sin(pixtimesn) / pixtimesn);
1070 Double_t ret;
1071
1072 if (GetNvar() % 2) ret = TMath::Power (lanczos, static_cast<Int_t>(GetNvar()));
1073 else ret = TMath::Abs (lanczos) * TMath::Power (lanczos, static_cast<Int_t>(GetNvar() - 1));
1074
1075 return ret;
1076}
1077
1078////////////////////////////////////////////////////////////////////////////////
1079/// statistical error estimate for RS estimator
1080
1082 Float_t sumW2S, Float_t sumW2B ) const
1083{
1084 Float_t c = fScaleB/fScaleS;
1085 Float_t d = countS + c*countB; d *= d;
1086
1087 if (d < 1e-10) return 1; // Error is zero because of B = S = 0
1088
1089 Float_t f = c*c/d/d;
1090 Float_t err = f*countB*countB*sumW2S + f*countS*countS*sumW2B;
1091
1092 if (err < 1e-10) return 1; // Error is zero because of B or S = 0
1093
1094 return sqrt(err);
1095}
1096
1097////////////////////////////////////////////////////////////////////////////////
1098/// write weights to xml file
1099
1100void TMVA::MethodPDERS::AddWeightsXMLTo( void* parent ) const
1101{
1102 void* wght = gTools().AddChild(parent, "Weights");
1103 if (fBinaryTree)
1104 fBinaryTree->AddXMLTo(wght);
1105 else
1106 Log() << kFATAL << "Signal and background binary search tree not available" << Endl;
1107 //Log() << kFATAL << "Please implement writing of weights as XML" << Endl;
1108}
1109
1110////////////////////////////////////////////////////////////////////////////////
1111
1113{
1114 if (NULL != fBinaryTree) delete fBinaryTree;
1115 void* treenode = gTools().GetChild(wghtnode);
1116 fBinaryTree = TMVA::BinarySearchTree::CreateFromXML(treenode);
1117 if(!fBinaryTree)
1118 Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1119 if(!fBinaryTree)
1120 Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1121 fBinaryTree->SetPeriode( GetNvar() );
1122 fBinaryTree->CalcStatistics();
1123 fBinaryTree->CountNodes();
1124 if (fBinaryTree->GetSumOfWeights( Types::kSignal ) > 0)
1125 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
1126 else fScaleS = 1;
1127 if (fBinaryTree->GetSumOfWeights( Types::kBackground ) > 0)
1128 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
1129 else fScaleB = 1;
1130 Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1131 CalcAverages();
1132 SetVolumeElement();
1133 fInitializedVolumeEle = kTRUE;
1134}
1135
1136////////////////////////////////////////////////////////////////////////////////
1137/// read weight info from file
1138
1140{
1141 if (NULL != fBinaryTree) delete fBinaryTree;
1142
1143 fBinaryTree = new BinarySearchTree();
1144
1145 istr >> *fBinaryTree;
1146
1147 fBinaryTree->SetPeriode( GetNvar() );
1148
1149 fBinaryTree->CalcStatistics();
1150
1151 fBinaryTree->CountNodes();
1152
1153 // these are the signal and background scales for the weights
1154 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
1155 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
1156
1157 Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1158
1159 CalcAverages();
1160
1161 SetVolumeElement();
1162
1163 fInitializedVolumeEle = kTRUE;
1164}
1165
1166////////////////////////////////////////////////////////////////////////////////
1167/// write training sample (TTree) to file
1168
1170{
1171}
1172
1173////////////////////////////////////////////////////////////////////////////////
1174/// read training sample from file
1175
1177{
1178}
1179
1180////////////////////////////////////////////////////////////////////////////////
1181/// static pointer to this object
1182
1184{
1185 return GetMethodPDERSThreadLocal();
1186}
1187////////////////////////////////////////////////////////////////////////////////
1188/// update static this pointer
1189
1191{
1192 GetMethodPDERSThreadLocal() = this;
1193}
1194
1195////////////////////////////////////////////////////////////////////////////////
1196/// write specific classifier response
1197
1198void TMVA::MethodPDERS::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1199{
1200 fout << " // not implemented for class: \"" << className << "\"" << std::endl;
1201 fout << "};" << std::endl;
1202}
1203
1204////////////////////////////////////////////////////////////////////////////////
1205/// get help message text
1206///
1207/// typical length of text line:
1208/// "|--------------------------------------------------------------|"
1209
1211{
1212 Log() << Endl;
1213 Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1214 Log() << Endl;
1215 Log() << "PDERS is a generalization of the projective likelihood classifier " << Endl;
1216 Log() << "to N dimensions, where N is the number of input variables used." << Endl;
1217 Log() << "In its adaptive form it is mostly equivalent to k-Nearest-Neighbor" << Endl;
1218 Log() << "(k-NN) methods. If the multidimensional PDF for signal and background" << Endl;
1219 Log() << "were known, this classifier would exploit the full information" << Endl;
1220 Log() << "contained in the input variables, and would hence be optimal. In " << Endl;
1221 Log() << "practice however, huge training samples are necessary to sufficiently " << Endl;
1222 Log() << "populate the multidimensional phase space. " << Endl;
1223 Log() << Endl;
1224 Log() << "The simplest implementation of PDERS counts the number of signal" << Endl;
1225 Log() << "and background events in the vicinity of a test event, and returns" << Endl;
1226 Log() << "a weight according to the majority species of the neighboring events." << Endl;
1227 Log() << "A more involved version of PDERS (selected by the option \"KernelEstimator\")" << Endl;
1228 Log() << "uses Kernel estimation methods to approximate the shape of the PDF." << Endl;
1229 Log() << Endl;
1230 Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
1231 Log() << Endl;
1232 Log() << "PDERS can be very powerful in case of strongly non-linear problems, " << Endl;
1233 Log() << "e.g., distinct islands of signal and background regions. Because of " << Endl;
1234 Log() << "the exponential growth of the phase space, it is important to restrict" << Endl;
1235 Log() << "the number of input variables (dimension) to the strictly necessary." << Endl;
1236 Log() << Endl;
1237 Log() << "Note that PDERS is a slowly responding classifier. Moreover, the necessity" << Endl;
1238 Log() << "to store the entire binary tree in memory, to avoid accessing virtual " << Endl;
1239 Log() << "memory, limits the number of training events that can effectively be " << Endl;
1240 Log() << "used to model the multidimensional PDF." << Endl;
1241 Log() << Endl;
1242 Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1243 Log() << Endl;
1244 Log() << "If the PDERS response is found too slow when using the adaptive volume " << Endl;
1245 Log() << "size (option \"VolumeRangeMode=Adaptive\"), it might be found beneficial" << Endl;
1246 Log() << "to reduce the number of events required in the volume, and/or to enlarge" << Endl;
1247 Log() << "the allowed range (\"NeventsMin/Max\"). PDERS is relatively insensitive" << Endl;
1248 Log() << "to the width (\"GaussSigma\") of the Gaussian kernel (if used)." << Endl;
1249}
@ kBox
Definition: Buttons.h:29
#define REGISTER_METHOD(CLASS)
for example
ROOT::R::TRInterface & r
Definition: Object.C:4
#define d(i)
Definition: RSha256.hxx:102
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:43
const Bool_t kFALSE
Definition: RtypesCore.h:90
bool Bool_t
Definition: RtypesCore.h:61
double Double_t
Definition: RtypesCore.h:57
float Float_t
Definition: RtypesCore.h:55
const Bool_t kTRUE
Definition: RtypesCore.h:89
#define ClassImp(name)
Definition: Rtypes.h:361
int type
Definition: TGX11.cxx:120
double sqrt(double)
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:53
Node for the BinarySearch or Decision Trees.
const std::vector< Float_t > & GetEventV() const
A simple Binary search tree including a volume search method.
static BinarySearchTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML
Class that contains all the data information.
Definition: DataSetInfo.h:60
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:236
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:359
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:319
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
This is a generalization of the above Likelihood methods to dimensions, where is the number of inpu...
Definition: MethodPDERS.h:60
void WriteWeightsToStream(TFile &rf) const
write training sample (TTree) to file
void CreateBinarySearchTree(Types::ETreeType type)
create binary search trees for signal and background
BinarySearchTree * fBinaryTree
Definition: MethodPDERS.h:176
virtual ~MethodPDERS(void)
destructor
MethodPDERS(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor for the PDERS method
void GetSample(const Event &e, std::vector< const BinarySearchTreeNode * > &events, Volume *volume)
Float_t GetError(Float_t countS, Float_t countB, Float_t sumW2S, Float_t sumW2B) const
statistical error estimate for RS estimator
static MethodPDERS * ThisPDERS(void)
static pointer to this object
Double_t KernelNormalization(Double_t pdf)
Calculating the normalization factor only once (might need a reset at some point.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
init the size of a volume element using a defined fraction of the volume containing the entire events
void ReadWeightsFromXML(void *wghtnode)
void ProcessOptions()
process the options specified by the user
void RRScalc(const Event &, std::vector< Float_t > *count)
void GetHelpMessage() const
get help message text
void UpdateThis()
update static this pointer
void Train(void)
this is a dummy training: the preparation work to do is the construction of the binary tree as a poin...
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Double_t CRScalc(const Event &)
void DeclareOptions()
define the options (their key words) that can be set in the option string.
void CalcAverages()
compute also average RMS values required for adaptive Gaussian
void RKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &, std::vector< Float_t > *pdfSum)
normalization factors so we can work with radius 1 hyperspheres
void ReadWeightsFromStream(std::istream &istr)
read weight info from file
const std::vector< Float_t > & GetRegressionValues()
Double_t NormSinc(Double_t x)
NormSinc.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
PDERS can handle classification with 2 classes and regression with one or more regression-targets.
void AddWeightsXMLTo(void *parent) const
write weights to xml file
void SetVolumeElement(void)
defines volume dimensions
void Init(void)
default initialisation routine called by all constructors
Double_t LanczosFilter(Int_t level, Double_t x)
Lanczos Filter.
Double_t CKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &)
normalization factors so we can work with radius 1 hyperspheres
Double_t ApplyKernelFunction(Double_t normalized_distance)
from the normalized euclidean distance calculate the distance for a certain kernel
Double_t GetNormalizedDistance(const TMVA::Event &base_event, const BinarySearchTreeNode &sample_event, Double_t *dim_normalization)
We use Euclidian metric here. Might not be best or most efficient.
static Double_t IGetVolumeContentForRoot(Double_t)
Interface to RootFinder.
Double_t GetVolumeContentForRoot(Double_t)
count number of events in rescaled volume
Volume * fHelpVolume
Definition: MethodPDERS.h:111
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition: RootFinder.h:48
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
Definition: RootFinder.cxx:72
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1135
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1161
Singleton class for Global types used by TMVA.
Definition: Types.h:73
@ kSignal
Definition: Types.h:136
@ kBackground
Definition: Types.h:137
EAnalysisType
Definition: Types.h:127
@ kClassification
Definition: Types.h:128
@ kRegression
Definition: Types.h:129
@ kTraining
Definition: Types.h:144
Volume for BinarySearchTree.
Definition: Volume.h:48
void ScaleInterval(Double_t f)
"scale" the volume by symmetrically blowing up the interval in each dimension
Definition: Volume.cxx:180
Basic string class.
Definition: TString.h:131
Double_t x[n]
Definition: legend1.C:17
double dist(Rotation3D const &r1, Rotation3D const &r2)
Definition: 3DDistances.cxx:48
create variable transformations
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Definition: TMath.cxx:448
Double_t Log(Double_t x)
Definition: TMath.h:750
Double_t Sqrt(Double_t x)
Definition: TMath.h:681
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:725
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:348
constexpr Double_t Pi()
Definition: TMath.h:38
Double_t Sin(Double_t)
Definition: TMath.h:627
Short_t Abs(Short_t d)
Definition: TMathBase.h:120