Logo ROOT   6.10/09
Reference Guide
MethodPDERS.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Yair Mahalalel, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /***********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodPDERS *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Krzysztof Danielowski <danielow@cern.ch> - IFJ PAN & AGH, Poland *
15  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
16  * Kamil Kraszewski <kalq@cern.ch> - IFJ PAN & UJ, Poland *
17  * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Poland *
18  * Yair Mahalalel <Yair.Mahalalel@cern.ch> - CERN, Switzerland *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * *
22  * Copyright (c) 2005: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  ***********************************************************************************/
31 
32 /*! \class TMVA::MethodPDERS
33 \ingroup TMVA
34 
35 This is a generalization of the above Likelihood methods to \f$ N_{var} \f$
36 dimensions, where \f$ N_{var} \f$ is the number of input variables
37 used in the MVA. If the multi-dimensional probability density functions
38 (PDFs) for signal and background were known, this method contains the entire
39 physical information, and is therefore optimal. Usually, kernel estimation
40 methods are used to approximate the PDFs using the events from the
41 training sample.
42 
43 A very simple probability density estimator (PDE) has been suggested
44 in [hep-ex/0211019](http://arxiv.org/abs/hep-ex/0211019). The
45 PDE for a given test event is obtained from counting the (normalized)
46 number of signal and background (training) events that occur in the
47 "vicinity" of the test event. The volume that describes "vicinity" is
48 user-defined. A [search method based on binary-trees](http://arxiv.org/abs/hep-ex/0211019)
49 is used to effectively reduce the
50 selection time for the range search. Three different volume definitions
51 are optional:
52 
53  - *MinMax:* the volume is defined in each dimension with respect
54  to the full variable range found in the training sample.
55  - *RMS:* the volume is defined in each dimensions with respect
56  to the RMS estimated from the training sample.
57  - *Adaptive:* a volume element is defined in each dimensions with
58  respect to the RMS estimated from the training sample. The overall
59  scale of the volume element is then determined for each event so
60  that the total number of events confined in the volume be within
61  a user-defined range.
62 
63 The adaptive range search is used by default.
64 */
65 
66 #include "TMVA/MethodPDERS.h"
67 
68 #include "TMVA/BinaryTree.h"
69 #include "TMVA/BinarySearchTree.h"
70 #include "TMVA/Configurable.h"
71 #include "TMVA/ClassifierFactory.h"
72 #include "TMVA/Event.h"
73 #include "TMVA/IMethod.h"
74 #include "TMVA/MethodBase.h"
75 #include "TMVA/MsgLogger.h"
76 #include "TMVA/RootFinder.h"
77 #include "TMVA/Tools.h"
79 #include "TMVA/Types.h"
80 
81 #include "ThreadLocalStorage.h"
82 #include "TBuffer.h"
83 #include "TFile.h"
84 #include "TObjString.h"
85 #include "TMath.h"
86 
87 #include <assert.h>
88 #include <algorithm>
89 
90 namespace TMVA {
91  const Bool_t MethodPDERS_UseFindRoot = kFALSE;
92 };
93 
94 
95 REGISTER_METHOD(PDERS)
96 
98 
99 ////////////////////////////////////////////////////////////////////////////////
100 /// standard constructor for the PDERS method
101 
102  TMVA::MethodPDERS::MethodPDERS( const TString& jobName,
103  const TString& methodTitle,
104  DataSetInfo& theData,
105  const TString& theOption) :
106  MethodBase( jobName, Types::kPDERS, methodTitle, theData, theOption),
107  fFcnCall(0),
108  fVRangeMode(kAdaptive),
109  fKernelEstimator(kBox),
110  fDelta(0),
111  fShift(0),
112  fScaleS(0),
113  fScaleB(0),
114  fDeltaFrac(0),
115  fGaussSigma(0),
116  fGaussSigmaNorm(0),
117  fNRegOut(0),
118  fNEventsMin(0),
119  fNEventsMax(0),
120  fMaxVIterations(0),
121  fInitialScale(0),
122  fInitializedVolumeEle(0),
123  fkNNMin(0),
124  fkNNMax(0),
125  fMax_distance(0),
126  fPrinted(0),
127  fNormTree(0)
128 {
129  fHelpVolume = NULL;
130  fBinaryTree = NULL;
131 }
132 
133 ////////////////////////////////////////////////////////////////////////////////
134 /// construct MethodPDERS through from file
135 
137  const TString& theWeightFile) :
138  MethodBase( Types::kPDERS, theData, theWeightFile),
139  fFcnCall(0),
140  fVRangeMode(kAdaptive),
141  fKernelEstimator(kBox),
142  fDelta(0),
143  fShift(0),
144  fScaleS(0),
145  fScaleB(0),
146  fDeltaFrac(0),
147  fGaussSigma(0),
148  fGaussSigmaNorm(0),
149  fNRegOut(0),
150  fNEventsMin(0),
151  fNEventsMax(0),
152  fMaxVIterations(0),
153  fInitialScale(0),
154  fInitializedVolumeEle(0),
155  fkNNMin(0),
156  fkNNMax(0),
157  fMax_distance(0),
158  fPrinted(0),
159  fNormTree(0)
160 {
161  fHelpVolume = NULL;
162  fBinaryTree = NULL;
163 }
164 
165 ////////////////////////////////////////////////////////////////////////////////
166 /// PDERS can handle classification with 2 classes and regression with one or more regression-targets
167 
169 {
170  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
171  if (type == Types::kRegression) return kTRUE;
172  return kFALSE;
173 }
174 
175 ////////////////////////////////////////////////////////////////////////////////
176 /// default initialisation routine called by all constructors
177 
179 {
180  fBinaryTree = NULL;
181 
182  UpdateThis();
183 
184  // default options
185  fDeltaFrac = 3.0;
188 
189  // special options for Adaptive mode
190  fNEventsMin = 100;
191  fNEventsMax = 200;
192  fMaxVIterations = 150;
193  fInitialScale = 0.99;
194  fGaussSigma = 0.1;
195  fNormTree = kFALSE;
196 
199 
201  fAverageRMS.clear();
202 
203  // the minimum requirement to declare an event signal-like
204  SetSignalReferenceCut( 0.0 );
205 }
206 
207 ////////////////////////////////////////////////////////////////////////////////
208 /// destructor
209 
211 {
212  if (fDelta) delete fDelta;
213  if (fShift) delete fShift;
214 
215  if (NULL != fBinaryTree) delete fBinaryTree;
216 }
217 
218 ////////////////////////////////////////////////////////////////////////////////
219 /// define the options (their key words) that can be set in the option string.
220 ///
221 /// know options:
222 /// - VolumeRangeMode <string> Method to determine volume range
223 /// available values are:
224 /// - MinMax
225 /// - Unscaled
226 /// - RMS
227 /// - kNN
228 /// - Adaptive <default>
229 ///
230 /// - KernelEstimator <string> Kernel estimation function
231 /// available values are:
232 /// - Box <default>
233 /// - Sphere
234 /// - Teepee
235 /// - Gauss
236 /// - Sinc3
237 /// - Sinc5
238 /// - Sinc7
239 /// - Sinc9
240 /// - Sinc11
241 /// - Lanczos2
242 /// - Lanczos3
243 /// - Lanczos5
244 /// - Lanczos8
245 /// - Trim
246 ///
247 /// - DeltaFrac <float> Ratio of #EventsMin/#EventsMax for MinMax and RMS volume range
248 /// - NEventsMin <int> Minimum number of events for adaptive volume range
249 /// - NEventsMax <int> Maximum number of events for adaptive volume range
250 /// - MaxVIterations <int> Maximum number of iterations for adaptive volume range
251 /// - InitialScale <float> Initial scale for adaptive volume range
252 /// - GaussSigma <float> Width with respect to the volume size of Gaussian kernel estimator
253 
255 {
256  DeclareOptionRef(fVolumeRange="Adaptive", "VolumeRangeMode", "Method to determine volume size");
257  AddPreDefVal(TString("Unscaled"));
258  AddPreDefVal(TString("MinMax"));
259  AddPreDefVal(TString("RMS"));
260  AddPreDefVal(TString("Adaptive"));
261  AddPreDefVal(TString("kNN"));
262 
263  DeclareOptionRef(fKernelString="Box", "KernelEstimator", "Kernel estimation function");
264  AddPreDefVal(TString("Box"));
265  AddPreDefVal(TString("Sphere"));
266  AddPreDefVal(TString("Teepee"));
267  AddPreDefVal(TString("Gauss"));
268  AddPreDefVal(TString("Sinc3"));
269  AddPreDefVal(TString("Sinc5"));
270  AddPreDefVal(TString("Sinc7"));
271  AddPreDefVal(TString("Sinc9"));
272  AddPreDefVal(TString("Sinc11"));
273  AddPreDefVal(TString("Lanczos2"));
274  AddPreDefVal(TString("Lanczos3"));
275  AddPreDefVal(TString("Lanczos5"));
276  AddPreDefVal(TString("Lanczos8"));
277  AddPreDefVal(TString("Trim"));
278 
279  DeclareOptionRef(fDeltaFrac , "DeltaFrac", "nEventsMin/Max for minmax and rms volume range");
280  DeclareOptionRef(fNEventsMin , "NEventsMin", "nEventsMin for adaptive volume range");
281  DeclareOptionRef(fNEventsMax , "NEventsMax", "nEventsMax for adaptive volume range");
282  DeclareOptionRef(fMaxVIterations, "MaxVIterations", "MaxVIterations for adaptive volume range");
283  DeclareOptionRef(fInitialScale , "InitialScale", "InitialScale for adaptive volume range");
284  DeclareOptionRef(fGaussSigma , "GaussSigma", "Width (wrt volume size) of Gaussian kernel estimator");
285  DeclareOptionRef(fNormTree , "NormTree", "Normalize binary search tree");
286 }
287 
288 ////////////////////////////////////////////////////////////////////////////////
289 /// process the options specified by the user
290 
292 {
294  Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
295  << GetMethodTypeName()
296  << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
297  << Endl;
298  }
299 
300  fGaussSigmaNorm = fGaussSigma; // * TMath::Sqrt( Double_t(GetNvar()) );
301 
303 
304  if (fVolumeRange == "MinMax" ) fVRangeMode = kMinMax;
305  else if (fVolumeRange == "RMS" ) fVRangeMode = kRMS;
306  else if (fVolumeRange == "Adaptive" ) fVRangeMode = kAdaptive;
307  else if (fVolumeRange == "Unscaled" ) fVRangeMode = kUnscaled;
308  else if (fVolumeRange == "kNN" ) fVRangeMode = kkNN;
309  else {
310  Log() << kFATAL << "VolumeRangeMode parameter '" << fVolumeRange << "' unknown" << Endl;
311  }
312 
313  if (fKernelString == "Box" ) fKernelEstimator = kBox;
314  else if (fKernelString == "Sphere" ) fKernelEstimator = kSphere;
315  else if (fKernelString == "Teepee" ) fKernelEstimator = kTeepee;
316  else if (fKernelString == "Gauss" ) fKernelEstimator = kGauss;
317  else if (fKernelString == "Sinc3" ) fKernelEstimator = kSinc3;
318  else if (fKernelString == "Sinc5" ) fKernelEstimator = kSinc5;
319  else if (fKernelString == "Sinc7" ) fKernelEstimator = kSinc7;
320  else if (fKernelString == "Sinc9" ) fKernelEstimator = kSinc9;
321  else if (fKernelString == "Sinc11" ) fKernelEstimator = kSinc11;
322  else if (fKernelString == "Lanczos2" ) fKernelEstimator = kLanczos2;
323  else if (fKernelString == "Lanczos3" ) fKernelEstimator = kLanczos3;
324  else if (fKernelString == "Lanczos5" ) fKernelEstimator = kLanczos5;
325  else if (fKernelString == "Lanczos8" ) fKernelEstimator = kLanczos8;
326  else if (fKernelString == "Trim" ) fKernelEstimator = kTrim;
327  else {
328  Log() << kFATAL << "KernelEstimator parameter '" << fKernelString << "' unknown" << Endl;
329  }
330 
331  // TODO: Add parameter validation
332 
333  Log() << kVERBOSE << "interpreted option string: vRangeMethod: '"
334  << (const char*)((fVRangeMode == kMinMax) ? "MinMax" :
335  (fVRangeMode == kUnscaled) ? "Unscaled" :
336  (fVRangeMode == kRMS ) ? "RMS" : "Adaptive") << "'" << Endl;
337  if (fVRangeMode == kMinMax || fVRangeMode == kRMS)
338  Log() << kVERBOSE << "deltaFrac: " << fDeltaFrac << Endl;
339  else
340  Log() << kVERBOSE << "nEventsMin/Max, maxVIterations, initialScale: "
341  << fNEventsMin << " " << fNEventsMax
342  << " " << fMaxVIterations << " " << fInitialScale << Endl;
343  Log() << kVERBOSE << "KernelEstimator = " << fKernelString << Endl;
344 }
345 
346 ////////////////////////////////////////////////////////////////////////////////
347 /// this is a dummy training: the preparation work to do is the construction
348 /// of the binary tree as a pointer chain. It is easier to directly save the
349 /// trainingTree in the weight file, and to rebuild the binary tree in the
350 /// test phase from scratch
351 
353 {
354  if (IsNormalised()) Log() << kFATAL << "\"Normalise\" option cannot be used with PDERS; "
355  << "please remove the option from the configuration string, or "
356  << "use \"!Normalise\""
357  << Endl;
358 
360 
361  CalcAverages();
363 
366 }
367 
368 ////////////////////////////////////////////////////////////////////////////////
369 /// init the size of a volume element using a defined fraction of the
370 /// volume containing the entire events
371 
373 {
374  if (fInitializedVolumeEle == kFALSE) {
376 
377  // binary trees must exist
378  assert( fBinaryTree );
379 
380  CalcAverages();
382  }
383 
384  // cannot determine error
385  NoErrorCalc(err, errUpper);
386 
387  return this->CRScalc( *GetEvent() );
388 }
389 
390 ////////////////////////////////////////////////////////////////////////////////
391 
392 const std::vector< Float_t >& TMVA::MethodPDERS::GetRegressionValues()
393 {
394  if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>;
395  fRegressionReturnVal->clear();
396  // init the size of a volume element using a defined fraction of the
397  // volume containing the entire events
398  if (fInitializedVolumeEle == kFALSE) {
400 
401  // binary trees must exist
402  assert( fBinaryTree );
403 
404  CalcAverages();
405 
407  }
408 
409  const Event* ev = GetEvent();
410  this->RRScalc( *ev, fRegressionReturnVal );
411 
412  Event * evT = new Event(*ev);
413  UInt_t ivar = 0;
414  for (std::vector<Float_t>::iterator it = fRegressionReturnVal->begin(); it != fRegressionReturnVal->end(); it++ ) {
415  evT->SetTarget(ivar,(*it));
416  ivar++;
417  }
418 
419  const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
420  fRegressionReturnVal->clear();
421 
422  for (ivar = 0; ivar<evT2->GetNTargets(); ivar++) {
423  fRegressionReturnVal->push_back(evT2->GetTarget(ivar));
424  }
425 
426  delete evT;
427 
428 
429  return (*fRegressionReturnVal);
430 }
431 
432 ////////////////////////////////////////////////////////////////////////////////
433 /// compute also average RMS values required for adaptive Gaussian
434 
436 {
437  if (fVRangeMode == kAdaptive || fVRangeMode == kRMS || fVRangeMode == kkNN ) {
438  fAverageRMS.clear();
440 
441  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
442  if (!DoRegression()){ //why there are separate rms for signal and background?
443  Float_t rmsS = fBinaryTree->RMS(Types::kSignal, ivar);
444  Float_t rmsB = fBinaryTree->RMS(Types::kBackground, ivar);
445  fAverageRMS.push_back( (rmsS + rmsB)*0.5 );
446  } else {
447  Float_t rms = fBinaryTree->RMS( ivar );
448  fAverageRMS.push_back( rms );
449  }
450  }
451  }
452 }
453 
454 ////////////////////////////////////////////////////////////////////////////////
455 /// create binary search trees for signal and background
456 
458 {
459  if (NULL != fBinaryTree) delete fBinaryTree;
461  if (fNormTree) {
463  }
464 
466 
467  if (fNormTree) {
469  }
470 
471  if (!DoRegression()) {
472  // these are the signal and background scales for the weights
475 
476  Log() << kVERBOSE << "Signal and background scales: " << fScaleS << " " << fScaleB << Endl;
477  }
478 }
479 
480 ////////////////////////////////////////////////////////////////////////////////
481 /// defines volume dimensions
482 
484  if (GetNvar()==0) {
485  Log() << kFATAL << "GetNvar() == 0" << Endl;
486  return;
487  }
488 
489  // init relative scales
492 
493  if (fDelta) delete fDelta;
494  if (fShift) delete fShift;
495  fDelta = new std::vector<Float_t>( GetNvar() );
496  fShift = new std::vector<Float_t>( GetNvar() );
497 
498  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
499  switch (fVRangeMode) {
500 
501  case kRMS:
502  case kkNN:
503  case kAdaptive:
504  // sanity check
505  if (fAverageRMS.size() != GetNvar())
506  Log() << kFATAL << "<SetVolumeElement> RMS not computed: " << fAverageRMS.size() << Endl;
507  (*fDelta)[ivar] = fAverageRMS[ivar]*fDeltaFrac;
508  Log() << kVERBOSE << "delta of var[" << (*fInputVars)[ivar]
509  << "\t]: " << fAverageRMS[ivar]
510  << "\t | comp with |max - min|: " << (GetXmax( ivar ) - GetXmin( ivar ))
511  << Endl;
512  break;
513  case kMinMax:
514  (*fDelta)[ivar] = (GetXmax( ivar ) - GetXmin( ivar ))*fDeltaFrac;
515  break;
516  case kUnscaled:
517  (*fDelta)[ivar] = fDeltaFrac;
518  break;
519  default:
520  Log() << kFATAL << "<SetVolumeElement> unknown range-set mode: "
521  << fVRangeMode << Endl;
522  }
523  (*fShift)[ivar] = 0.5; // volume is centered around test value
524  }
525 
526 }
527 
528 ////////////////////////////////////////////////////////////////////////////////
529 /// Interface to RootFinder
530 
532 {
533  return ThisPDERS()->GetVolumeContentForRoot( scale );
534 }
535 
536 ////////////////////////////////////////////////////////////////////////////////
537 /// count number of events in rescaled volume
538 
540 {
541  Volume v( *fHelpVolume );
542  v.ScaleInterval( scale );
543 
544  Double_t count = GetBinaryTree()->SearchVolume( &v );
545 
546  v.Delete();
547  return count;
548 }
549 
551  std::vector<const BinarySearchTreeNode*>& events,
552  Volume *volume )
553 {
554  Float_t count = 0;
555 
556  // -------------------------------------------------------------------------
557  //
558  // ==== test of volume search =====
559  //
560  // #define TMVA::MethodPDERS__countByHand__Debug__
561 
562 #ifdef TMVA_MethodPDERS__countByHand__Debug__
563 
564  // starting values
565  count = fBinaryTree->SearchVolume( volume );
566 
567  Int_t iS = 0, iB = 0;
568  UInt_t nvar = GetNvar();
569  for (UInt_t ievt_=0; ievt_<Data()->GetNTrainingEvents(); ievt_++) {
570  const Event * ev = GetTrainingEvent(ievt_);
571  Bool_t inV;
572  for (Int_t ivar=0; ivar<nvar; ivar++) {
573  Float_t x = ev->GetValue(ivar);
574  inV = (x > (*volume->Lower)[ivar] && x <= (*volume->Upper)[ivar]);
575  if (!inV) break;
576  }
577  if (inV) {
578  in++;
579  }
580  }
581  Log() << kVERBOSE << "debug: my test: " << in << Endl;// <- ***********tree
582  Log() << kVERBOSE << "debug: binTree: " << count << Endl << Endl;// <- ***********tree
583 
584 #endif
585 
586  // -------------------------------------------------------------------------
587 
588  if (fVRangeMode == kRMS || fVRangeMode == kMinMax || fVRangeMode == kUnscaled) { // Constant volume
589 
590  std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
591  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
592  std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
593  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
594  (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
595  (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
596  }
597  Volume* svolume = new Volume( lb, ub );
598  // starting values
599 
600  fBinaryTree->SearchVolume( svolume, &events );
601  }
602  else if (fVRangeMode == kAdaptive) { // adaptive volume
603 
604  // -----------------------------------------------------------------------
605 
606  // TODO: optimize, perhaps multi stage with broadening limits,
607  // or a different root finding method entirely,
608  if (MethodPDERS_UseFindRoot) {
609 
610  // that won't need to search through large volume, where the bottle neck probably is
611 
612  fHelpVolume = volume;
613 
614  UpdateThis(); // necessary update of static pointer
615  RootFinder rootFinder( this, 0.01, 50, 200, 10 );
616  Double_t scale = rootFinder.Root( (fNEventsMin + fNEventsMax)/2.0 );
617 
618  volume->ScaleInterval( scale );
619 
620  fBinaryTree->SearchVolume( volume, &events );
621 
622  fHelpVolume = NULL;
623  }
624  // -----------------------------------------------------------------------
625  else {
626 
627  // starting values
628  count = fBinaryTree->SearchVolume( volume );
629 
630  Float_t nEventsO = count;
631  Int_t i_=0;
632 
633  while (nEventsO < fNEventsMin) { // this isn't a sain start... try again
634  volume->ScaleInterval( 1.15 );
635  count = fBinaryTree->SearchVolume( volume );
636  nEventsO = count;
637  i_++;
638  }
639  if (i_ > 50) Log() << kWARNING << "warning in event: " << e
640  << ": adaptive volume pre-adjustment reached "
641  << ">50 iterations in while loop (" << i_ << ")" << Endl;
642 
643  Float_t nEventsN = nEventsO;
644  Float_t nEventsE = 0.5*(fNEventsMin + fNEventsMax);
645  Float_t scaleO = 1.0;
646  Float_t scaleN = fInitialScale;
647  Float_t scale = scaleN;
648  Float_t scaleBest = scaleN;
649  Float_t nEventsBest = nEventsN;
650 
651  for (Int_t ic=1; ic<fMaxVIterations; ic++) {
652  if (nEventsN < fNEventsMin || nEventsN > fNEventsMax) {
653 
654  // search for events in rescaled volume
655  Volume* v = new Volume( *volume );
656  v->ScaleInterval( scale );
657  nEventsN = fBinaryTree->SearchVolume( v );
658 
659  // determine next iteration (linear approximation)
660  if (nEventsN > 1 && nEventsN - nEventsO != 0)
661  if (scaleN - scaleO != 0)
662  scale += (scaleN - scaleO)/(nEventsN - nEventsO)*(nEventsE - nEventsN);
663  else
664  scale += (-0.01); // should not actually occur...
665  else
666  scale += 0.5; // use much larger volume
667 
668  // save old scale
669  scaleN = scale;
670 
671  // take if better (don't accept it if too small number of events)
672  if (TMath::Abs(nEventsN - nEventsE) < TMath::Abs(nEventsBest - nEventsE) &&
673  (nEventsN >= fNEventsMin || nEventsBest < nEventsN)) {
674  nEventsBest = nEventsN;
675  scaleBest = scale;
676  }
677 
678  v->Delete();
679  delete v;
680  }
681  else break;
682  }
683 
684  // last sanity check
685  nEventsN = nEventsBest;
686  // include "1" to cover float precision
687  if (nEventsN < fNEventsMin-1 || nEventsN > fNEventsMax+1)
688  Log() << kWARNING << "warning in event " << e
689  << ": adaptive volume adjustment reached "
690  << "max. #iterations (" << fMaxVIterations << ")"
691  << "[ nEvents: " << nEventsN << " " << fNEventsMin << " " << fNEventsMax << "]"
692  << Endl;
693 
694  volume->ScaleInterval( scaleBest );
695  fBinaryTree->SearchVolume( volume, &events );
696  }
697 
698  // end of adaptive method
699 
700  } else if (fVRangeMode == kkNN) {
701  Volume v(*volume);
702 
703  events.clear();
704  // check number of signals in beginning volume
705  Int_t kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 );
706  //if this number is too large return fkNNMax+1
707 
708  Int_t t_times = 0; // number of iterations
709 
710  while ( !(kNNcount >= fkNNMin && kNNcount <= fkNNMax) ) {
711  if (kNNcount < fkNNMin) { //if we have too less points
712  Float_t scale = 2; //better scale needed
713  volume->ScaleInterval( scale );
714  }
715  else if (kNNcount > fkNNMax) { //if we have too many points
716  Float_t scale = 0.1; //better scale needed
717  volume->ScaleInterval( scale );
718  }
719  events.clear();
720 
721  v = *volume ;
722 
723  kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 ); //new search
724 
725  t_times++;
726 
727  if (t_times == fMaxVIterations) {
728  Log() << kWARNING << "warning in event" << e
729  << ": kNN volume adjustment reached "
730  << "max. #iterations (" << fMaxVIterations << ")"
731  << "[ kNN: " << fkNNMin << " " << fkNNMax << Endl;
732  break;
733  }
734  }
735 
736  //vector to normalize distance in each dimension
737  Double_t *dim_normalization = new Double_t[GetNvar()];
738  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
739  dim_normalization [ivar] = 1.0 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
740  }
741 
742  std::vector<const BinarySearchTreeNode*> tempVector; // temporary vector for events
743 
744  if (kNNcount >= fkNNMin) {
745  std::vector<Double_t> *distances = new std::vector<Double_t>( kNNcount );
746 
747  //counting the distance for each event
748  for (Int_t j=0;j< Int_t(events.size()) ;j++)
749  (*distances)[j] = GetNormalizedDistance ( e, *events[j], dim_normalization );
750 
751  //counting the fkNNMin-th element
752  std::vector<Double_t>::iterator wsk = distances->begin();
753  for (Int_t j=0;j<fkNNMin-1;j++) wsk++;
754  std::nth_element( distances->begin(), wsk, distances->end() );
755 
756  //getting all elements that are closer than fkNNMin-th element
757  //signals
758  for (Int_t j=0;j<Int_t(events.size());j++) {
759  Double_t dist = GetNormalizedDistance( e, *events[j], dim_normalization );
760 
761  if (dist <= (*distances)[fkNNMin-1])
762  tempVector.push_back( events[j] );
763  }
764  fMax_distance = (*distances)[fkNNMin-1];
765  delete distances;
766  }
767  delete[] dim_normalization;
768  events = tempVector;
769 
770  } else {
771 
772  // troubles ahead...
773  Log() << kFATAL << "<GetSample> unknown RangeMode: " << fVRangeMode << Endl;
774  }
775  // -----------------------------------------------------------------------
776 }
777 
778 ////////////////////////////////////////////////////////////////////////////////
779 
781 {
782  std::vector<const BinarySearchTreeNode*> events;
783 
784  // computes event weight by counting number of signal and background
785  // events (of reference sample) that are found within given volume
786  // defined by the event
787  std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
788  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
789 
790  std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
791  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
792  (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
793  (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
794  }
795 
796  Volume *volume = new Volume( lb, ub );
797 
798  GetSample( e, events, volume );
799  Double_t count = CKernelEstimate( e, events, *volume );
800  delete volume;
801  delete lb;
802  delete ub;
803 
804  return count;
805 }
806 
807 ////////////////////////////////////////////////////////////////////////////////
808 
809 void TMVA::MethodPDERS::RRScalc( const Event& e, std::vector<Float_t>* count )
810 {
811  std::vector<const BinarySearchTreeNode*> events;
812 
813  // computes event weight by counting number of signal and background
814  // events (of reference sample) that are found within given volume
815  // defined by the event
816  std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
817  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
818 
819  std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
820  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
821  (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
822  (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
823  }
824  Volume *volume = new Volume( lb, ub );
825 
826  GetSample( e, events, volume );
827  RKernelEstimate( e, events, *volume, count );
828 
829  delete volume;
830 
831  return;
832 }
833 ////////////////////////////////////////////////////////////////////////////////
834 /// normalization factors so we can work with radius 1 hyperspheres
835 
837  std::vector<const BinarySearchTreeNode*>& events, Volume& v )
838 {
839  Double_t *dim_normalization = new Double_t[GetNvar()];
840  for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
841  dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
842 
843  Double_t pdfSumS = 0;
844  Double_t pdfSumB = 0;
845 
846  // Iteration over sample points
847  for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); iev++) {
848 
849  // First switch to the one dimensional distance
850  Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
851 
852  // always working within the hyperelipsoid, except for when we don't
853  // note that rejection ratio goes to 1 as nvar goes to infinity
854  if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
855 
856  if ( (*iev)->GetClass()==fSignalClass )
857  pdfSumS += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
858  else
859  pdfSumB += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
860  }
861  pdfSumS = KernelNormalization( pdfSumS < 0. ? 0. : pdfSumS );
862  pdfSumB = KernelNormalization( pdfSumB < 0. ? 0. : pdfSumB );
863 
864  delete[] dim_normalization;
865 
866  if (pdfSumS < 1e-20 && pdfSumB < 1e-20) return 0.5;
867  if (pdfSumB < 1e-20) return 1.0;
868  if (pdfSumS < 1e-20) return 0.0;
869 
870  Float_t r = pdfSumB*fScaleB/(pdfSumS*fScaleS);
871  return 1.0/(r + 1.0); // TODO: propagate errors from here
872 }
873 
874 ////////////////////////////////////////////////////////////////////////////////
875 /// normalization factors so we can work with radius 1 hyperspheres
876 
878  std::vector<const BinarySearchTreeNode*>& events, Volume& v,
879  std::vector<Float_t>* pdfSum )
880 {
881  Double_t *dim_normalization = new Double_t[GetNvar()];
882  for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
883  dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
884 
885  // std::vector<Float_t> pdfSum;
886  pdfSum->clear();
887  Float_t pdfDiv = 0;
888  fNRegOut = 1; // for now, regression is just for 1 dimension
889 
890  for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
891  pdfSum->push_back( 0 );
892 
893  // Iteration over sample points
894  for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); iev++) {
895 
896  // First switch to the one dimensional distance
897  Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
898 
899  // always working within the hyperelipsoid, except for when we don't
900  // note that rejection ratio goes to 1 as nvar goes to infinity
901  if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
902 
903  for (Int_t ivar = 0; ivar < fNRegOut ; ivar++) {
904  pdfSum->at(ivar) += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight() * (*iev)->GetTargets()[ivar];
905  pdfDiv += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
906  }
907  }
908 
909  delete[] dim_normalization;
910 
911  if (pdfDiv == 0)
912  return;
913 
914  for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
915  pdfSum->at(ivar) /= pdfDiv;
916 
917  return;
918 }
919 
920 ////////////////////////////////////////////////////////////////////////////////
921 /// from the normalized euclidean distance calculate the distance
922 /// for a certain kernel
923 
925 {
926  switch (fKernelEstimator) {
927  case kBox:
928  case kSphere:
929  return 1;
930  break;
931  case kTeepee:
932  return (1 - normalized_distance);
933  break;
934  case kGauss:
935  return TMath::Gaus( normalized_distance, 0, fGaussSigmaNorm, kFALSE);
936  break;
937  case kSinc3:
938  case kSinc5:
939  case kSinc7:
940  case kSinc9:
941  case kSinc11: {
942  Double_t side_crossings = 2 + ((int) fKernelEstimator) - ((int) kSinc3);
943  return NormSinc (side_crossings * normalized_distance);
944  }
945  break;
946  case kLanczos2:
947  return LanczosFilter (2, normalized_distance);
948  break;
949  case kLanczos3:
950  return LanczosFilter (3, normalized_distance);
951  break;
952  case kLanczos5:
953  return LanczosFilter (5, normalized_distance);
954  break;
955  case kLanczos8:
956  return LanczosFilter (8, normalized_distance);
957  break;
958  case kTrim: {
959  Double_t x = normalized_distance / fMax_distance;
960  x = 1 - x*x*x;
961  return x*x*x;
962  }
963  break;
964  default:
965  Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
966  break;
967  }
968 
969  return 0;
970 }
971 
972 ////////////////////////////////////////////////////////////////////////////////
973 /// Calculating the normalization factor only once (might need a reset at some point.
974 /// Can the method be restarted with different params?)
975 
977 {
978  // Caching jammed to disable function.
979  // It's not really useful after all, badly implemented and untested :-)
980  TTHREAD_TLS(Double_t) ret = 1.0;
981 
982  if (ret != 0.0) return ret*pdf;
983 
984  // We first normalize by the volume of the hypersphere.
985  switch (fKernelEstimator) {
986  case kBox:
987  case kSphere:
988  ret = 1.;
989  break;
990  case kTeepee:
991  ret = (GetNvar() * (GetNvar() + 1) * TMath::Gamma (((Double_t) GetNvar()) / 2.)) /
992  ( TMath::Power (2., (Double_t) GetNvar() + 1) * TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.));
993  break;
994  case kGauss:
995  // We use full range integral here. Reasonable because of the fast function decay.
996  ret = 1. / TMath::Power ( 2 * TMath::Pi() * fGaussSigmaNorm * fGaussSigmaNorm, ((Double_t) GetNvar()) / 2.);
997  break;
998  case kSinc3:
999  case kSinc5:
1000  case kSinc7:
1001  case kSinc9:
1002  case kSinc11:
1003  case kLanczos2:
1004  case kLanczos3:
1005  case kLanczos5:
1006  case kLanczos8:
1007  // We use the full range integral here. Reasonable because the central lobe dominates it.
1008  ret = 1 / TMath::Power ( 2., (Double_t) GetNvar() );
1009  break;
1010  default:
1011  Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
1012  }
1013 
1014  // Normalizing by the full volume
1015  ret *= ( TMath::Power (2., static_cast<Int_t>(GetNvar())) * TMath::Gamma (1 + (((Double_t) GetNvar()) / 2.)) ) /
1016  TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.);
1017 
1018  return ret*pdf;
1019 }
1020 
1021 ////////////////////////////////////////////////////////////////////////////////
1022 /// We use Euclidian metric here. Might not be best or most efficient.
1023 
1025  const BinarySearchTreeNode &sample_event,
1026  Double_t *dim_normalization)
1027 {
1028  Double_t ret=0;
1029  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1030  Double_t dist = dim_normalization[ivar] * (sample_event.GetEventV()[ivar] - base_event.GetValue(ivar));
1031  ret += dist*dist;
1032  }
1033  // DD 26.11.2008: bugfix: division by (GetNvar()) was missing for correct normalisation
1034  ret /= GetNvar();
1035  return TMath::Sqrt (ret);
1036 }
1037 
1038 ////////////////////////////////////////////////////////////////////////////////
1039 /// NormSinc
1040 
1042 {
1043  if (x < 10e-10 && x > -10e-10) {
1044  return 1; // Poor man's l'Hopital
1045  }
1046 
1047  Double_t pix = TMath::Pi() * x;
1048  Double_t sinc = TMath::Sin(pix) / pix;
1049  Double_t ret;
1050 
1051  if (GetNvar() % 2)
1052  ret = TMath::Power (sinc, static_cast<Int_t>(GetNvar()));
1053  else
1054  ret = TMath::Abs (sinc) * TMath::Power (sinc, static_cast<Int_t>(GetNvar() - 1));
1055 
1056  return ret;
1057 }
1058 
1059 ////////////////////////////////////////////////////////////////////////////////
1060 /// Lanczos Filter
1061 
1063 {
1064  if (x < 10e-10 && x > -10e-10) {
1065  return 1; // Poor man's l'Hopital
1066  }
1067 
1068  Double_t pix = TMath::Pi() * x;
1069  Double_t pixtimesn = pix * ((Double_t) level);
1070  Double_t lanczos = (TMath::Sin(pix) / pix) * (TMath::Sin(pixtimesn) / pixtimesn);
1071  Double_t ret;
1072 
1073  if (GetNvar() % 2) ret = TMath::Power (lanczos, static_cast<Int_t>(GetNvar()));
1074  else ret = TMath::Abs (lanczos) * TMath::Power (lanczos, static_cast<Int_t>(GetNvar() - 1));
1075 
1076  return ret;
1077 }
1078 
1079 ////////////////////////////////////////////////////////////////////////////////
1080 /// statistical error estimate for RS estimator
1081 
1083  Float_t sumW2S, Float_t sumW2B ) const
1084 {
1085  Float_t c = fScaleB/fScaleS;
1086  Float_t d = countS + c*countB; d *= d;
1087 
1088  if (d < 1e-10) return 1; // Error is zero because of B = S = 0
1089 
1090  Float_t f = c*c/d/d;
1091  Float_t err = f*countB*countB*sumW2S + f*countS*countS*sumW2B;
1092 
1093  if (err < 1e-10) return 1; // Error is zero because of B or S = 0
1094 
1095  return sqrt(err);
1096 }
1097 
1098 ////////////////////////////////////////////////////////////////////////////////
1099 /// write weights to xml file
1100 
1101 void TMVA::MethodPDERS::AddWeightsXMLTo( void* parent ) const
1102 {
1103  void* wght = gTools().AddChild(parent, "Weights");
1104  if (fBinaryTree)
1105  fBinaryTree->AddXMLTo(wght);
1106  else
1107  Log() << kFATAL << "Signal and background binary search tree not available" << Endl;
1108  //Log() << kFATAL << "Please implement writing of weights as XML" << Endl;
1109 }
1110 
1111 ////////////////////////////////////////////////////////////////////////////////
1112 
1114 {
1115  if (NULL != fBinaryTree) delete fBinaryTree;
1116  void* treenode = gTools().GetChild(wghtnode);
1118  if(!fBinaryTree)
1119  Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1120  if(!fBinaryTree)
1121  Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1127  else fScaleS = 1;
1130  else fScaleB = 1;
1131  Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1132  CalcAverages();
1133  SetVolumeElement();
1135 }
1136 
1137 ////////////////////////////////////////////////////////////////////////////////
1138 /// read weight info from file
1139 
1141 {
1142  if (NULL != fBinaryTree) delete fBinaryTree;
1143 
1144  fBinaryTree = new BinarySearchTree();
1145 
1146  istr >> *fBinaryTree;
1147 
1148  fBinaryTree->SetPeriode( GetNvar() );
1149 
1150  fBinaryTree->CalcStatistics();
1151 
1152  fBinaryTree->CountNodes();
1153 
1154  // these are the signal and background scales for the weights
1155  fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
1156  fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
1157 
1158  Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1159 
1160  CalcAverages();
1161 
1162  SetVolumeElement();
1163 
1165 }
1166 
1167 ////////////////////////////////////////////////////////////////////////////////
1168 /// write training sample (TTree) to file
1169 
1171 {
1172 }
1173 
1174 ////////////////////////////////////////////////////////////////////////////////
1175 /// read training sample from file
1176 
1178 {
1179 }
1180 
1181 ////////////////////////////////////////////////////////////////////////////////
1182 /// static pointer to this object
1183 
1185 {
1186  return GetMethodPDERSThreadLocal();
1187 }
1188 ////////////////////////////////////////////////////////////////////////////////
1189 /// update static this pointer
1190 
1192 {
1193  GetMethodPDERSThreadLocal() = this;
1194 }
1195 
1196 ////////////////////////////////////////////////////////////////////////////////
1197 /// write specific classifier response
1198 
1199 void TMVA::MethodPDERS::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1200 {
1201  fout << " // not implemented for class: \"" << className << "\"" << std::endl;
1202  fout << "};" << std::endl;
1203 }
1204 
1205 ////////////////////////////////////////////////////////////////////////////////
1206 /// get help message text
1207 ///
1208 /// typical length of text line:
1209 /// "|--------------------------------------------------------------|"
1210 
1212 {
1213  Log() << Endl;
1214  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1215  Log() << Endl;
1216  Log() << "PDERS is a generalization of the projective likelihood classifier " << Endl;
1217  Log() << "to N dimensions, where N is the number of input variables used." << Endl;
1218  Log() << "In its adaptive form it is mostly equivalent to k-Nearest-Neighbor" << Endl;
1219  Log() << "(k-NN) methods. If the multidimensional PDF for signal and background" << Endl;
1220  Log() << "were known, this classifier would exploit the full information" << Endl;
1221  Log() << "contained in the input variables, and would hence be optimal. In " << Endl;
1222  Log() << "practice however, huge training samples are necessary to sufficiently " << Endl;
1223  Log() << "populate the multidimensional phase space. " << Endl;
1224  Log() << Endl;
1225  Log() << "The simplest implementation of PDERS counts the number of signal" << Endl;
1226  Log() << "and background events in the vicinity of a test event, and returns" << Endl;
1227  Log() << "a weight according to the majority species of the neighboring events." << Endl;
1228  Log() << "A more involved version of PDERS (selected by the option \"KernelEstimator\")" << Endl;
1229  Log() << "uses Kernel estimation methods to approximate the shape of the PDF." << Endl;
1230  Log() << Endl;
1231  Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
1232  Log() << Endl;
1233  Log() << "PDERS can be very powerful in case of strongly non-linear problems, " << Endl;
1234  Log() << "e.g., distinct islands of signal and background regions. Because of " << Endl;
1235  Log() << "the exponential growth of the phase space, it is important to restrict" << Endl;
1236  Log() << "the number of input variables (dimension) to the strictly necessary." << Endl;
1237  Log() << Endl;
1238  Log() << "Note that PDERS is a slowly responding classifier. Moreover, the necessity" << Endl;
1239  Log() << "to store the entire binary tree in memory, to avoid accessing virtual " << Endl;
1240  Log() << "memory, limits the number of training events that can effectively be " << Endl;
1241  Log() << "used to model the multidimensional PDF." << Endl;
1242  Log() << Endl;
1243  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1244  Log() << Endl;
1245  Log() << "If the PDERS response is found too slow when using the adaptive volume " << Endl;
1246  Log() << "size (option \"VolumeRangeMode=Adaptive\"), it might be found beneficial" << Endl;
1247  Log() << "to reduce the number of events required in the volume, and/or to enlarge" << Endl;
1248  Log() << "the allowed range (\"NeventsMin/Max\"). PDERS is relatively insensitive" << Endl;
1249  Log() << "to the width (\"GaussSigma\") of the Gaussian kernel (if used)." << Endl;
1250 }
Int_t SearchVolumeWithMaxLimit(TMVA::Volume *, std::vector< const TMVA::BinarySearchTreeNode *> *events=0, Int_t=-1)
recursively walk through the daughter nodes and add up all weights of events that lie within the give...
std::vector< Double_t > * fLower
Definition: Volume.h:73
virtual void * AddXMLTo(void *parent) const
add attributes to XML
Definition: BinaryTree.cxx:134
void WriteWeightsToStream(TFile &rf) const
write training sample (TTree) to file
void Delete(void)
delete array of volume bondaries
Definition: Volume.cxx:144
void UpdateThis()
update static this pointer
double dist(Rotation3D const &r1, Rotation3D const &r2)
Definition: 3DDistances.cxx:48
virtual ~MethodPDERS(void)
destructor
Definition: Buttons.h:29
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Singleton class for Global types used by TMVA.
Definition: Types.h:73
Double_t GetNormalizedDistance(const TMVA::Event &base_event, const BinarySearchTreeNode &sample_event, Double_t *dim_normalization)
We use Euclidian metric here. Might not be best or most efficient.
std::vector< Float_t > * fShift
Definition: MethodPDERS.h:178
Float_t fMaxVIterations
Definition: MethodPDERS.h:192
float Float_t
Definition: RtypesCore.h:53
void GetHelpMessage() const
get help message text
BinarySearchTree * fBinaryTree
Definition: MethodPDERS.h:175
UInt_t GetNvar() const
Definition: MethodBase.h:328
Double_t NormSinc(Double_t x)
NormSinc.
MsgLogger & Log() const
Definition: Configurable.h:122
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
EAnalysisType
Definition: Types.h:125
void CreateBinarySearchTree(Types::ETreeType type)
create binary search trees for signal and background
Double_t Fill(const std::vector< TMVA::Event *> &events, const std::vector< Int_t > &theVars, Int_t theType=-1)
create the search tree from the event collection using ONLY the variables specified in "theVars" ...
Virtual base Class for all MVA method.
Definition: MethodBase.h:106
std::vector< Double_t > * fUpper
Definition: Volume.h:74
Basic string class.
Definition: TString.h:129
void AddWeightsXMLTo(void *parent) const
write weights to xml file
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:378
void ScaleInterval(Double_t f)
"scale" the volume by symmetrically blowing up the interval in each dimension
Definition: Volume.cxx:162
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
void RKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode *> &, Volume &, std::vector< Float_t > *pdfSum)
normalization factors so we can work with radius 1 hyperspheres
Float_t fInitialScale
Definition: MethodPDERS.h:193
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
init the size of a volume element using a defined fraction of the volume containing the entire events...
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:352
Node for the BinarySearch or Decision Trees.
#define NULL
Definition: RtypesCore.h:88
Double_t GetSumOfWeights(void) const
return the sum of event (node) weights
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1135
Short_t Abs(Short_t d)
Definition: TMathBase.h:108
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:628
Double_t fMax_distance
Definition: MethodPDERS.h:200
Volume for BinarySearchTree.
Definition: Volume.h:48
void GetSample(const Event &e, std::vector< const BinarySearchTreeNode *> &events, Volume *volume)
void NormalizeTree()
Normalisation of tree.
UInt_t fSignalClass
Definition: MethodBase.h:671
void Init(void)
default initialisation routine called by all constructors
static Double_t IGetVolumeContentForRoot(Double_t)
Interface to RootFinder.
double sqrt(double)
Double_t x[n]
Definition: legend1.C:17
void ReadWeightsFromStream(std::istream &istr)
read weight info from file
const Event * GetEvent() const
Definition: MethodBase.h:733
DataSet * Data() const
Definition: MethodBase.h:393
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1161
TString fVolumeRange
Definition: MethodPDERS.h:146
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:340
Float_t GetError(Float_t countS, Float_t countB, Float_t sumW2S, Float_t sumW2B) const
statistical error estimate for RS estimator
Bool_t DoRegression() const
Definition: MethodBase.h:422
Class that contains all the data information.
Definition: DataSetInfo.h:60
constexpr Double_t Pi()
Definition: TMath.h:40
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
void CalcStatistics(TMVA::Node *n=0)
calculate basic statistics (mean, rms for each variable)
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: MethodBase.h:753
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:320
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
Definition: RootFinder.cxx:72
void CalcAverages()
compute also average RMS values required for adaptive Gaussian
Double_t CRScalc(const Event &)
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:341
TString fKernelString
Definition: MethodPDERS.h:147
static BinarySearchTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML
void DeclareOptions()
define the options (their key words) that can be set in the option string.
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:97
Double_t fGaussSigma
Definition: MethodPDERS.h:184
TRandom2 r(17)
void SetVolumeElement(void)
defines volume dimensions
SVector< double, 2 > v
Definition: Dict.h:5
Bool_t fInitializedVolumeEle
Definition: MethodPDERS.h:195
BinarySearchTree * GetBinaryTree(void) const
Definition: MethodPDERS.h:114
unsigned int UInt_t
Definition: RtypesCore.h:42
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:360
void SetNormalize(Bool_t norm)
Tools & gTools()
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Definition: TMath.cxx:452
const Bool_t kFALSE
Definition: RtypesCore.h:92
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
void ReadWeightsFromXML(void *wghtnode)
static MethodPDERS * ThisPDERS(void)
static pointer to this object
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:668
This is a generalization of the above Likelihood methods to dimensions, where is the number of inpu...
Definition: MethodPDERS.h:59
void Train(void)
this is a dummy training: the preparation work to do is the construction of the binary tree as a poin...
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
Volume * fHelpVolume
Definition: MethodPDERS.h:110
#define ClassImp(name)
Definition: Rtypes.h:336
double f(double x)
std::vector< Float_t > * fDelta
Definition: MethodPDERS.h:177
double Double_t
Definition: RtypesCore.h:55
Float_t RMS(Types::ESBType sb, UInt_t var)
enum TMVA::MethodPDERS::EKernelEstimator fKernelEstimator
Bool_t IsNormalised() const
Definition: MethodBase.h:478
int type
Definition: TGX11.cxx:120
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void AddPreDefVal(const T &)
Definition: Configurable.h:168
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
PDERS can handle classification with 2 classes and regression with one or more regression-targets.
void ExitFromTraining()
Definition: MethodBase.h:446
Double_t CKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode *> &, Volume &)
normalization factors so we can work with radius 1 hyperspheres
std::vector< Float_t > fAverageRMS
Definition: MethodPDERS.h:179
UInt_t CountNodes(Node *n=NULL)
return the number of nodes in the tree. (make a new count –> takes time)
Definition: BinaryTree.cxx:103
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
const std::vector< Float_t > & GetEventV() const
const std::vector< Float_t > & GetRegressionValues()
#define REGISTER_METHOD(CLASS)
for example
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition: RootFinder.h:48
Abstract ClassifierFactory template that handles arbitrary types.
TString GetMethodTypeName() const
Definition: MethodBase.h:316
enum TMVA::MethodPDERS::EVolumeRangeMode fVRangeMode
Double_t Sin(Double_t)
Definition: TMath.h:548
void ProcessOptions()
process the options specified by the user
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:579
Double_t GetVolumeContentForRoot(Double_t)
count number of events in rescaled volume
MethodPDERS(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor for the PDERS method
Double_t Sqrt(Double_t x)
Definition: TMath.h:591
Double_t SearchVolume(Volume *, std::vector< const TMVA::BinarySearchTreeNode *> *events=0)
search the whole tree and add up all weights of events that lie within the given volume ...
static MethodPDERS *& GetMethodPDERSThreadLocal()
Definition: MethodPDERS.h:214
Double_t ApplyKernelFunction(Double_t normalized_distance)
from the normalized euclidean distance calculate the distance for a certain kernel ...
const Bool_t kTRUE
Definition: RtypesCore.h:91
Double_t fGaussSigmaNorm
Definition: MethodPDERS.h:185
A simple Binary search tree including a volume search method.
Double_t LanczosFilter(Int_t level, Double_t x)
Lanczos Filter.
void RRScalc(const Event &, std::vector< Float_t > *count)
Double_t KernelNormalization(Double_t pdf)
Calculating the normalization factor only once (might need a reset at some point. ...
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:829
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:348