ROOT  6.06/09
Reference Guide
MethodPDERS.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Yair Mahalalel, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /***********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodPDERS *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Krzysztof Danielowski <danielow@cern.ch> - IFJ PAN & AGH, Poland *
15  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
16  * Kamil Kraszewski <kalq@cern.ch> - IFJ PAN & UJ, Poland *
17  * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Poland *
18  * Yair Mahalalel <Yair.Mahalalel@cern.ch> - CERN, Switzerland *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * *
22  * Copyright (c) 2005: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  ***********************************************************************************/
31 
32 //_______________________________________________________________________
33 // Begin_Html
34 /*
35  This is a generalization of the above Likelihood methods to <i>N</i><sub>var</sub>
36  dimensions, where <i>N</i><sub>var</sub> is the number of input variables
37  used in the MVA. If the multi-dimensional probability density functions
38  (PDFs) for signal and background were known, this method contains the entire
39  physical information, and is therefore optimal. Usually, kernel estimation
40  methods are used to approximate the PDFs using the events from the
41  training sample. <br><p></p>
42 
43  A very simple probability density estimator (PDE) has been suggested
44  in <a href="http://arxiv.org/abs/hep-ex/0211019">hep-ex/0211019</a>. The
45  PDE for a given test event is obtained from counting the (normalized)
46  number of signal and background (training) events that occur in the
47  "vicinity" of the test event. The volume that describes "vicinity" is
48  user-defined. A <a href="http://arxiv.org/abs/hep-ex/0211019">search
49  method based on binary-trees</a> is used to effectively reduce the
50  selection time for the range search. Three different volume definitions
51  are optional: <br><p></p>
52  <ul>
53  <li><u>MinMax:</u>
54  the volume is defined in each dimension with respect
55  to the full variable range found in the training sample. </li>
56  <li><u>RMS:</u>
57  the volume is defined in each dimensions with respect
58  to the RMS estimated from the training sample. </li>
59  <li><u>Adaptive:</u>
60  a volume element is defined in each dimensions with
61  respect to the RMS estimated from the training sample. The overall
62  scale of the volume element is then determined for each event so
63  that the total number of events confined in the volume be within
64  a user-defined range.</li>
65  </ul><p></p>
66  The adaptive range search is used by default.
67  // End_Html
68  */
69 //_______________________________________________________________________
70 
71 #include <assert.h>
72 #include <algorithm>
73 
74 #include "TFile.h"
75 #include "TObjString.h"
76 #include "TMath.h"
77 
78 #include "TMVA/ClassifierFactory.h"
79 #include "TMVA/MethodPDERS.h"
80 #include "TMVA/Tools.h"
81 #include "TMVA/RootFinder.h"
82 
83 #define TMVA_MethodPDERS__countByHand__Debug__
84 #undef TMVA_MethodPDERS__countByHand__Debug__
85 
86 namespace TMVA {
88 };
89 
90 
91 REGISTER_METHOD(PDERS)
92 
93 ClassImp(TMVA::MethodPDERS)
94 
95 ////////////////////////////////////////////////////////////////////////////////
96 /// standard constructor for the PDERS method
97 
98 TMVA::MethodPDERS::MethodPDERS( const TString& jobName,
99  const TString& methodTitle,
100  DataSetInfo& theData,
101  const TString& theOption,
102  TDirectory* theTargetDir ) :
103  MethodBase( jobName, Types::kPDERS, methodTitle, theData, theOption, theTargetDir ),
104  fFcnCall(0),
105  fVRangeMode(kAdaptive),
106  fKernelEstimator(kBox),
107  fDelta(0),
108  fShift(0),
109  fScaleS(0),
110  fScaleB(0),
111  fDeltaFrac(0),
112  fGaussSigma(0),
113  fGaussSigmaNorm(0),
114  fNRegOut(0),
115  fNEventsMin(0),
116  fNEventsMax(0),
117  fMaxVIterations(0),
118  fInitialScale(0),
119  fInitializedVolumeEle(0),
120  fkNNMin(0),
121  fkNNMax(0),
122  fMax_distance(0),
123  fPrinted(0),
124  fNormTree(0)
125 {
126 }
127 
128 ////////////////////////////////////////////////////////////////////////////////
129 /// construct MethodPDERS through from file
130 
132  const TString& theWeightFile,
133  TDirectory* theTargetDir ) :
134  MethodBase( Types::kPDERS, theData, theWeightFile, theTargetDir ),
135  fFcnCall(0),
136  fVRangeMode(kAdaptive),
137  fKernelEstimator(kBox),
138  fDelta(0),
139  fShift(0),
140  fScaleS(0),
141  fScaleB(0),
142  fDeltaFrac(0),
143  fGaussSigma(0),
144  fGaussSigmaNorm(0),
145  fNRegOut(0),
146  fNEventsMin(0),
147  fNEventsMax(0),
148  fMaxVIterations(0),
149  fInitialScale(0),
150  fInitializedVolumeEle(0),
151  fkNNMin(0),
152  fkNNMax(0),
153  fMax_distance(0),
154  fPrinted(0),
155  fNormTree(0)
156 {
157 }
158 
159 ////////////////////////////////////////////////////////////////////////////////
160 /// PDERS can handle classification with 2 classes and regression with one or more regression-targets
161 
163 {
164  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
165  if (type == Types::kRegression) return kTRUE;
166  return kFALSE;
167 }
168 
169 ////////////////////////////////////////////////////////////////////////////////
170 /// default initialisation routine called by all constructors
171 
173 {
174  fBinaryTree = NULL;
175 
176  UpdateThis();
177 
178  // default options
179  fDeltaFrac = 3.0;
180  fVRangeMode = kAdaptive;
181  fKernelEstimator = kBox;
182 
183  // special options for Adaptive mode
184  fNEventsMin = 100;
185  fNEventsMax = 200;
186  fMaxVIterations = 150;
187  fInitialScale = 0.99;
188  fGaussSigma = 0.1;
189  fNormTree = kFALSE;
190 
191  fkNNMin = Int_t(fNEventsMin);
192  fkNNMax = Int_t(fNEventsMax);
193 
194  fInitializedVolumeEle = kFALSE;
195  fAverageRMS.clear();
196 
197  // the minimum requirement to declare an event signal-like
198  SetSignalReferenceCut( 0.0 );
199 }
200 
201 ////////////////////////////////////////////////////////////////////////////////
202 /// destructor
203 
205 {
206  if (fDelta) delete fDelta;
207  if (fShift) delete fShift;
208 
209  if (NULL != fBinaryTree) delete fBinaryTree;
210 }
211 
212 ////////////////////////////////////////////////////////////////////////////////
213 /// define the options (their key words) that can be set in the option string
214 /// know options:
215 /// VolumeRangeMode <string> Method to determine volume range
216 /// available values are: MinMax
217 /// Unscaled
218 /// RMS
219 /// kNN
220 /// Adaptive <default>
221 ///
222 /// KernelEstimator <string> Kernel estimation function
223 /// available values are: Box <default>
224 /// Sphere
225 /// Teepee
226 /// Gauss
227 /// Sinc3
228 /// Sinc5
229 /// Sinc7
230 /// Sinc9
231 /// Sinc11
232 /// Lanczos2
233 /// Lanczos3
234 /// Lanczos5
235 /// Lanczos8
236 /// Trim
237 ///
238 /// DeltaFrac <float> Ratio of #EventsMin/#EventsMax for MinMax and RMS volume range
239 /// NEventsMin <int> Minimum number of events for adaptive volume range
240 /// NEventsMax <int> Maximum number of events for adaptive volume range
241 /// MaxVIterations <int> Maximum number of iterations for adaptive volume range
242 /// InitialScale <float> Initial scale for adaptive volume range
243 /// GaussSigma <float> Width with respect to the volume size of Gaussian kernel estimator
244 
246 {
247  DeclareOptionRef(fVolumeRange="Adaptive", "VolumeRangeMode", "Method to determine volume size");
248  AddPreDefVal(TString("Unscaled"));
249  AddPreDefVal(TString("MinMax"));
250  AddPreDefVal(TString("RMS"));
251  AddPreDefVal(TString("Adaptive"));
252  AddPreDefVal(TString("kNN"));
253 
254  DeclareOptionRef(fKernelString="Box", "KernelEstimator", "Kernel estimation function");
255  AddPreDefVal(TString("Box"));
256  AddPreDefVal(TString("Sphere"));
257  AddPreDefVal(TString("Teepee"));
258  AddPreDefVal(TString("Gauss"));
259  AddPreDefVal(TString("Sinc3"));
260  AddPreDefVal(TString("Sinc5"));
261  AddPreDefVal(TString("Sinc7"));
262  AddPreDefVal(TString("Sinc9"));
263  AddPreDefVal(TString("Sinc11"));
264  AddPreDefVal(TString("Lanczos2"));
265  AddPreDefVal(TString("Lanczos3"));
266  AddPreDefVal(TString("Lanczos5"));
267  AddPreDefVal(TString("Lanczos8"));
268  AddPreDefVal(TString("Trim"));
269 
270  DeclareOptionRef(fDeltaFrac , "DeltaFrac", "nEventsMin/Max for minmax and rms volume range");
271  DeclareOptionRef(fNEventsMin , "NEventsMin", "nEventsMin for adaptive volume range");
272  DeclareOptionRef(fNEventsMax , "NEventsMax", "nEventsMax for adaptive volume range");
273  DeclareOptionRef(fMaxVIterations, "MaxVIterations", "MaxVIterations for adaptive volume range");
274  DeclareOptionRef(fInitialScale , "InitialScale", "InitialScale for adaptive volume range");
275  DeclareOptionRef(fGaussSigma , "GaussSigma", "Width (wrt volume size) of Gaussian kernel estimator");
276  DeclareOptionRef(fNormTree , "NormTree", "Normalize binary search tree");
277 }
278 
279 ////////////////////////////////////////////////////////////////////////////////
280 /// process the options specified by the user
281 
283 {
284  if (IgnoreEventsWithNegWeightsInTraining()) {
285  Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
286  << GetMethodTypeName()
287  << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
288  << Endl;
289  }
290 
291  fGaussSigmaNorm = fGaussSigma; // * TMath::Sqrt( Double_t(GetNvar()) );
292 
293  fVRangeMode = MethodPDERS::kUnsupported;
294 
295  if (fVolumeRange == "MinMax" ) fVRangeMode = kMinMax;
296  else if (fVolumeRange == "RMS" ) fVRangeMode = kRMS;
297  else if (fVolumeRange == "Adaptive" ) fVRangeMode = kAdaptive;
298  else if (fVolumeRange == "Unscaled" ) fVRangeMode = kUnscaled;
299  else if (fVolumeRange == "kNN" ) fVRangeMode = kkNN;
300  else {
301  Log() << kFATAL << "VolumeRangeMode parameter '" << fVolumeRange << "' unknown" << Endl;
302  }
303 
304  if (fKernelString == "Box" ) fKernelEstimator = kBox;
305  else if (fKernelString == "Sphere" ) fKernelEstimator = kSphere;
306  else if (fKernelString == "Teepee" ) fKernelEstimator = kTeepee;
307  else if (fKernelString == "Gauss" ) fKernelEstimator = kGauss;
308  else if (fKernelString == "Sinc3" ) fKernelEstimator = kSinc3;
309  else if (fKernelString == "Sinc5" ) fKernelEstimator = kSinc5;
310  else if (fKernelString == "Sinc7" ) fKernelEstimator = kSinc7;
311  else if (fKernelString == "Sinc9" ) fKernelEstimator = kSinc9;
312  else if (fKernelString == "Sinc11" ) fKernelEstimator = kSinc11;
313  else if (fKernelString == "Lanczos2" ) fKernelEstimator = kLanczos2;
314  else if (fKernelString == "Lanczos3" ) fKernelEstimator = kLanczos3;
315  else if (fKernelString == "Lanczos5" ) fKernelEstimator = kLanczos5;
316  else if (fKernelString == "Lanczos8" ) fKernelEstimator = kLanczos8;
317  else if (fKernelString == "Trim" ) fKernelEstimator = kTrim;
318  else {
319  Log() << kFATAL << "KernelEstimator parameter '" << fKernelString << "' unknown" << Endl;
320  }
321 
322  // TODO: Add parameter validation
323 
324  Log() << kVERBOSE << "interpreted option string: vRangeMethod: '"
325  << (const char*)((fVRangeMode == kMinMax) ? "MinMax" :
326  (fVRangeMode == kUnscaled) ? "Unscaled" :
327  (fVRangeMode == kRMS ) ? "RMS" : "Adaptive") << "'" << Endl;
328  if (fVRangeMode == kMinMax || fVRangeMode == kRMS)
329  Log() << kVERBOSE << "deltaFrac: " << fDeltaFrac << Endl;
330  else
331  Log() << kVERBOSE << "nEventsMin/Max, maxVIterations, initialScale: "
332  << fNEventsMin << " " << fNEventsMax
333  << " " << fMaxVIterations << " " << fInitialScale << Endl;
334  Log() << kVERBOSE << "KernelEstimator = " << fKernelString << Endl;
335 }
336 
337 ////////////////////////////////////////////////////////////////////////////////
338 /// this is a dummy training: the preparation work to do is the construction
339 /// of the binary tree as a pointer chain. It is easier to directly save the
340 /// trainingTree in the weight file, and to rebuild the binary tree in the
341 /// test phase from scratch
342 
344 {
345  if (IsNormalised()) Log() << kFATAL << "\"Normalise\" option cannot be used with PDERS; "
346  << "please remove the option from the configuration string, or "
347  << "use \"!Normalise\""
348  << Endl;
349 
350  CreateBinarySearchTree( Types::kTraining );
351 
352  CalcAverages();
353  SetVolumeElement();
354 
355  fInitializedVolumeEle = kTRUE;
356 }
357 
358 ////////////////////////////////////////////////////////////////////////////////
359 /// init the size of a volume element using a defined fraction of the
360 /// volume containing the entire events
361 
363 {
364  if (fInitializedVolumeEle == kFALSE) {
365  fInitializedVolumeEle = kTRUE;
366 
367  // binary trees must exist
368  assert( fBinaryTree );
369 
370  CalcAverages();
371  SetVolumeElement();
372  }
373 
374  // cannot determine error
375  NoErrorCalc(err, errUpper);
376 
377  return this->CRScalc( *GetEvent() );
378 }
379 
380 ////////////////////////////////////////////////////////////////////////////////
381 
382 const std::vector< Float_t >& TMVA::MethodPDERS::GetRegressionValues()
383 {
384  if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>;
385  fRegressionReturnVal->clear();
386  // init the size of a volume element using a defined fraction of the
387  // volume containing the entire events
388  if (fInitializedVolumeEle == kFALSE) {
389  fInitializedVolumeEle = kTRUE;
390 
391  // binary trees must exist
392  assert( fBinaryTree );
393 
394  CalcAverages();
395 
396  SetVolumeElement();
397  }
398 
399  const Event* ev = GetEvent();
400  this->RRScalc( *ev, fRegressionReturnVal );
401 
402  Event * evT = new Event(*ev);
403  UInt_t ivar = 0;
404  for (std::vector<Float_t>::iterator it = fRegressionReturnVal->begin(); it != fRegressionReturnVal->end(); it++ ) {
405  evT->SetTarget(ivar,(*it));
406  ivar++;
407  }
408 
409  const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
410  fRegressionReturnVal->clear();
411 
412  for (ivar = 0; ivar<evT2->GetNTargets(); ivar++) {
413  fRegressionReturnVal->push_back(evT2->GetTarget(ivar));
414  }
415 
416  delete evT;
417 
418 
419  return (*fRegressionReturnVal);
420 }
421 
422 ////////////////////////////////////////////////////////////////////////////////
423 /// compute also average RMS values required for adaptive Gaussian
424 
426 {
427  if (fVRangeMode == kAdaptive || fVRangeMode == kRMS || fVRangeMode == kkNN ) {
428  fAverageRMS.clear();
429  fBinaryTree->CalcStatistics();
430 
431  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
432  if (!DoRegression()){ //why there are separate rms for signal and background?
433  Float_t rmsS = fBinaryTree->RMS(Types::kSignal, ivar);
434  Float_t rmsB = fBinaryTree->RMS(Types::kBackground, ivar);
435  fAverageRMS.push_back( (rmsS + rmsB)*0.5 );
436  } else {
437  Float_t rms = fBinaryTree->RMS( ivar );
438  fAverageRMS.push_back( rms );
439  }
440  }
441  }
442 }
443 
444 ////////////////////////////////////////////////////////////////////////////////
445 /// create binary search trees for signal and background
446 
448 {
449  if (NULL != fBinaryTree) delete fBinaryTree;
450  fBinaryTree = new BinarySearchTree();
451  if (fNormTree) {
452  fBinaryTree->SetNormalize( kTRUE );
453  }
454 
455  fBinaryTree->Fill( GetEventCollection(type) );
456 
457  if (fNormTree) {
458  fBinaryTree->NormalizeTree();
459  }
460 
461  if (!DoRegression()) {
462  // these are the signal and background scales for the weights
463  fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
464  fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
465 
466  Log() << kVERBOSE << "Signal and background scales: " << fScaleS << " " << fScaleB << Endl;
467  }
468 }
469 
470 ////////////////////////////////////////////////////////////////////////////////
471 /// defines volume dimensions
472 
474  if (GetNvar()==0) {
475  Log() << kFATAL << "GetNvar() == 0" << Endl;
476  return;
477  }
478 
479  // init relative scales
480  fkNNMin = Int_t(fNEventsMin);
481  fkNNMax = Int_t(fNEventsMax);
482 
483  if (fDelta) delete fDelta;
484  if (fShift) delete fShift;
485  fDelta = new std::vector<Float_t>( GetNvar() );
486  fShift = new std::vector<Float_t>( GetNvar() );
487 
488  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
489  switch (fVRangeMode) {
490 
491  case kRMS:
492  case kkNN:
493  case kAdaptive:
494  // sanity check
495  if (fAverageRMS.size() != GetNvar())
496  Log() << kFATAL << "<SetVolumeElement> RMS not computed: " << fAverageRMS.size() << Endl;
497  (*fDelta)[ivar] = fAverageRMS[ivar]*fDeltaFrac;
498  Log() << kVERBOSE << "delta of var[" << (*fInputVars)[ivar]
499  << "\t]: " << fAverageRMS[ivar]
500  << "\t | comp with |max - min|: " << (GetXmax( ivar ) - GetXmin( ivar ))
501  << Endl;
502  break;
503  case kMinMax:
504  (*fDelta)[ivar] = (GetXmax( ivar ) - GetXmin( ivar ))*fDeltaFrac;
505  break;
506  case kUnscaled:
507  (*fDelta)[ivar] = fDeltaFrac;
508  break;
509  default:
510  Log() << kFATAL << "<SetVolumeElement> unknown range-set mode: "
511  << fVRangeMode << Endl;
512  }
513  (*fShift)[ivar] = 0.5; // volume is centered around test value
514  }
515 
516 }
517 
518 ////////////////////////////////////////////////////////////////////////////////
519 /// Interface to RootFinder
520 
522 {
523  return ThisPDERS()->GetVolumeContentForRoot( scale );
524 }
525 
526 ////////////////////////////////////////////////////////////////////////////////
527 /// count number of events in rescaled volume
528 
530 {
531  Volume v( *fHelpVolume );
532  v.ScaleInterval( scale );
533 
534  Double_t count = GetBinaryTree()->SearchVolume( &v );
535 
536  v.Delete();
537  return count;
538 }
539 
541  std::vector<const BinarySearchTreeNode*>& events,
542  Volume *volume )
543 {
544  Float_t count = 0;
545 
546  // -------------------------------------------------------------------------
547  //
548  // ==== test of volume search =====
549  //
550  // #define TMVA::MethodPDERS__countByHand__Debug__
551 
552 #ifdef TMVA_MethodPDERS__countByHand__Debug__
553 
554  // starting values
555  count = fBinaryTree->SearchVolume( volume );
556 
557  Int_t iS = 0, iB = 0;
558  UInt_t nvar = GetNvar();
559  for (UInt_t ievt_=0; ievt_<Data()->GetNTrainingEvents(); ievt_++) {
560  const Event * ev = GetTrainingEvent(ievt_);
561  Bool_t inV;
562  for (Int_t ivar=0; ivar<nvar; ivar++) {
563  Float_t x = ev->GetValue(ivar);
564  inV = (x > (*volume->Lower)[ivar] && x <= (*volume->Upper)[ivar]);
565  if (!inV) break;
566  }
567  if (inV) {
568  in++;
569  }
570  }
571  Log() << kVERBOSE << "debug: my test: " << in << Endl;// <- ***********tree
572  Log() << kVERBOSE << "debug: binTree: " << count << Endl << Endl;// <- ***********tree
573 
574 #endif
575 
576  // -------------------------------------------------------------------------
577 
578  if (fVRangeMode == kRMS || fVRangeMode == kMinMax || fVRangeMode == kUnscaled) { // Constant volume
579 
580  std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
581  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
582  std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
583  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
584  (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
585  (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
586  }
587  Volume* svolume = new Volume( lb, ub );
588  // starting values
589 
590  fBinaryTree->SearchVolume( svolume, &events );
591  }
592  else if (fVRangeMode == kAdaptive) { // adaptive volume
593 
594  // -----------------------------------------------------------------------
595 
596  // TODO: optimize, perhaps multi stage with broadening limits,
597  // or a different root finding method entirely,
599 
600  // that won't need to search through large volume, where the bottle neck probably is
601 
602  fHelpVolume = volume;
603 
604  UpdateThis(); // necessary update of static pointer
605  RootFinder rootFinder( &IGetVolumeContentForRoot, 0.01, 50, 200, 10 );
606  Double_t scale = rootFinder.Root( (fNEventsMin + fNEventsMax)/2.0 );
607 
608  volume->ScaleInterval( scale );
609 
610  fBinaryTree->SearchVolume( volume, &events );
611 
612  fHelpVolume = NULL;
613  }
614  // -----------------------------------------------------------------------
615  else {
616 
617  // starting values
618  count = fBinaryTree->SearchVolume( volume );
619 
620  Float_t nEventsO = count;
621  Int_t i_=0;
622 
623  while (nEventsO < fNEventsMin) { // this isn't a sain start... try again
624  volume->ScaleInterval( 1.15 );
625  count = fBinaryTree->SearchVolume( volume );
626  nEventsO = count;
627  i_++;
628  }
629  if (i_ > 50) Log() << kWARNING << "warning in event: " << e
630  << ": adaptive volume pre-adjustment reached "
631  << ">50 iterations in while loop (" << i_ << ")" << Endl;
632 
633  Float_t nEventsN = nEventsO;
634  Float_t nEventsE = 0.5*(fNEventsMin + fNEventsMax);
635  Float_t scaleO = 1.0;
636  Float_t scaleN = fInitialScale;
637  Float_t scale = scaleN;
638  Float_t scaleBest = scaleN;
639  Float_t nEventsBest = nEventsN;
640 
641  for (Int_t ic=1; ic<fMaxVIterations; ic++) {
642  if (nEventsN < fNEventsMin || nEventsN > fNEventsMax) {
643 
644  // search for events in rescaled volume
645  Volume* v = new Volume( *volume );
646  v->ScaleInterval( scale );
647  nEventsN = fBinaryTree->SearchVolume( v );
648 
649  // determine next iteration (linear approximation)
650  if (nEventsN > 1 && nEventsN - nEventsO != 0)
651  if (scaleN - scaleO != 0)
652  scale += (scaleN - scaleO)/(nEventsN - nEventsO)*(nEventsE - nEventsN);
653  else
654  scale += (-0.01); // should not actually occur...
655  else
656  scale += 0.5; // use much larger volume
657 
658  // save old scale
659  scaleN = scale;
660 
661  // take if better (don't accept it if too small number of events)
662  if (TMath::Abs(nEventsN - nEventsE) < TMath::Abs(nEventsBest - nEventsE) &&
663  (nEventsN >= fNEventsMin || nEventsBest < nEventsN)) {
664  nEventsBest = nEventsN;
665  scaleBest = scale;
666  }
667 
668  v->Delete();
669  delete v;
670  }
671  else break;
672  }
673 
674  // last sanity check
675  nEventsN = nEventsBest;
676  // include "1" to cover float precision
677  if (nEventsN < fNEventsMin-1 || nEventsN > fNEventsMax+1)
678  Log() << kWARNING << "warning in event " << e
679  << ": adaptive volume adjustment reached "
680  << "max. #iterations (" << fMaxVIterations << ")"
681  << "[ nEvents: " << nEventsN << " " << fNEventsMin << " " << fNEventsMax << "]"
682  << Endl;
683 
684  volume->ScaleInterval( scaleBest );
685  fBinaryTree->SearchVolume( volume, &events );
686  }
687 
688  // end of adaptive method
689 
690  } else if (fVRangeMode == kkNN) {
691  Volume v(*volume);
692 
693  events.clear();
694  // check number of signals in begining volume
695  Int_t kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 );
696  //if this number is too large return fkNNMax+1
697 
698  Int_t t_times = 0; // number of iterations
699 
700  while ( !(kNNcount >= fkNNMin && kNNcount <= fkNNMax) ) {
701  if (kNNcount < fkNNMin) { //if we have too less points
702  Float_t scale = 2; //better scale needed
703  volume->ScaleInterval( scale );
704  }
705  else if (kNNcount > fkNNMax) { //if we have too many points
706  Float_t scale = 0.1; //better scale needed
707  volume->ScaleInterval( scale );
708  }
709  events.clear();
710 
711  v = *volume ;
712 
713  kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 ); //new search
714 
715  t_times++;
716 
717  if (t_times == fMaxVIterations) {
718  Log() << kWARNING << "warining in event" << e
719  << ": kNN volume adjustment reached "
720  << "max. #iterations (" << fMaxVIterations << ")"
721  << "[ kNN: " << fkNNMin << " " << fkNNMax << Endl;
722  break;
723  }
724  }
725 
726  //vector to normalize distance in each dimension
727  Double_t *dim_normalization = new Double_t[GetNvar()];
728  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
729  dim_normalization [ivar] = 1.0 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
730  }
731 
732  std::vector<const BinarySearchTreeNode*> tempVector; // temporary vector for events
733 
734  if (kNNcount >= fkNNMin) {
735  std::vector<Double_t> *distances = new std::vector<Double_t>( kNNcount );
736 
737  //counting the distance for each event
738  for (Int_t j=0;j< Int_t(events.size()) ;j++)
739  (*distances)[j] = GetNormalizedDistance ( e, *events[j], dim_normalization );
740 
741  //counting the fkNNMin-th element
742  std::vector<Double_t>::iterator wsk = distances->begin();
743  for (Int_t j=0;j<fkNNMin-1;j++) wsk++;
744  std::nth_element( distances->begin(), wsk, distances->end() );
745 
746  //getting all elements that are closer than fkNNMin-th element
747  //signals
748  for (Int_t j=0;j<Int_t(events.size());j++) {
749  Double_t dist = GetNormalizedDistance( e, *events[j], dim_normalization );
750 
751  if (dist <= (*distances)[fkNNMin-1])
752  tempVector.push_back( events[j] );
753  }
754  fMax_distance = (*distances)[fkNNMin-1];
755  delete distances;
756  }
757  delete[] dim_normalization;
758  events = tempVector;
759 
760  } else {
761 
762  // troubles ahead...
763  Log() << kFATAL << "<GetSample> unknown RangeMode: " << fVRangeMode << Endl;
764  }
765  // -----------------------------------------------------------------------
766 }
767 
768 ////////////////////////////////////////////////////////////////////////////////
769 
771 {
772  std::vector<const BinarySearchTreeNode*> events;
773 
774  // computes event weight by counting number of signal and background
775  // events (of reference sample) that are found within given volume
776  // defined by the event
777  std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
778  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
779 
780  std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
781  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
782  (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
783  (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
784  }
785 
786  Volume *volume = new Volume( lb, ub );
787 
788  GetSample( e, events, volume );
789  Double_t count = CKernelEstimate( e, events, *volume );
790  delete volume;
791  delete lb;
792  delete ub;
793 
794  return count;
795 }
796 
797 ////////////////////////////////////////////////////////////////////////////////
798 
799 void TMVA::MethodPDERS::RRScalc( const Event& e, std::vector<Float_t>* count )
800 {
801  std::vector<const BinarySearchTreeNode*> events;
802 
803  // computes event weight by counting number of signal and background
804  // events (of reference sample) that are found within given volume
805  // defined by the event
806  std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
807  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
808 
809  std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
810  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
811  (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
812  (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
813  }
814  Volume *volume = new Volume( lb, ub );
815 
816  GetSample( e, events, volume );
817  RKernelEstimate( e, events, *volume, count );
818 
819  delete volume;
820 
821  return;
822 }
823 ////////////////////////////////////////////////////////////////////////////////
824 /// normalization factors so we can work with radius 1 hyperspheres
825 
827  std::vector<const BinarySearchTreeNode*>& events, Volume& v )
828 {
829  Double_t *dim_normalization = new Double_t[GetNvar()];
830  for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
831  dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
832 
833  Double_t pdfSumS = 0;
834  Double_t pdfSumB = 0;
835 
836  // Iteration over sample points
837  for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); iev++) {
838 
839  // First switch to the one dimensional distance
840  Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
841 
842  // always working within the hyperelipsoid, except for when we don't
843  // note that rejection ratio goes to 1 as nvar goes to infinity
844  if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
845 
846  if ( (*iev)->GetClass()==fSignalClass )
847  pdfSumS += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
848  else
849  pdfSumB += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
850  }
851  pdfSumS = KernelNormalization( pdfSumS < 0. ? 0. : pdfSumS );
852  pdfSumB = KernelNormalization( pdfSumB < 0. ? 0. : pdfSumB );
853 
854  delete[] dim_normalization;
855 
856  if (pdfSumS < 1e-20 && pdfSumB < 1e-20) return 0.5;
857  if (pdfSumB < 1e-20) return 1.0;
858  if (pdfSumS < 1e-20) return 0.0;
859 
860  Float_t r = pdfSumB*fScaleB/(pdfSumS*fScaleS);
861  return 1.0/(r + 1.0); // TODO: propagate errors from here
862 }
863 
864 ////////////////////////////////////////////////////////////////////////////////
865 /// normalization factors so we can work with radius 1 hyperspheres
866 
868  std::vector<const BinarySearchTreeNode*>& events, Volume& v,
869  std::vector<Float_t>* pdfSum )
870 {
871  Double_t *dim_normalization = new Double_t[GetNvar()];
872  for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
873  dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
874 
875  // std::vector<Float_t> pdfSum;
876  pdfSum->clear();
877  Float_t pdfDiv = 0;
878  fNRegOut = 1; // for now, regression is just for 1 dimension
879 
880  for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
881  pdfSum->push_back( 0 );
882 
883  // Iteration over sample points
884  for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); iev++) {
885 
886  // First switch to the one dimensional distance
887  Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
888 
889  // always working within the hyperelipsoid, except for when we don't
890  // note that rejection ratio goes to 1 as nvar goes to infinity
891  if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
892 
893  for (Int_t ivar = 0; ivar < fNRegOut ; ivar++) {
894  pdfSum->at(ivar) += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight() * (*iev)->GetTargets()[ivar];
895  pdfDiv += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
896  }
897  }
898 
899  delete[] dim_normalization;
900 
901  if (pdfDiv == 0)
902  return;
903 
904  for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
905  pdfSum->at(ivar) /= pdfDiv;
906 
907  return;
908 }
909 
910 ////////////////////////////////////////////////////////////////////////////////
911 /// from the normalized euclidean distance calculate the distance
912 /// for a certain kernel
913 
915 {
916  switch (fKernelEstimator) {
917  case kBox:
918  case kSphere:
919  return 1;
920  break;
921  case kTeepee:
922  return (1 - normalized_distance);
923  break;
924  case kGauss:
925  return TMath::Gaus( normalized_distance, 0, fGaussSigmaNorm, kFALSE);
926  break;
927  case kSinc3:
928  case kSinc5:
929  case kSinc7:
930  case kSinc9:
931  case kSinc11: {
932  Double_t side_crossings = 2 + ((int) fKernelEstimator) - ((int) kSinc3);
933  return NormSinc (side_crossings * normalized_distance);
934  }
935  break;
936  case kLanczos2:
937  return LanczosFilter (2, normalized_distance);
938  break;
939  case kLanczos3:
940  return LanczosFilter (3, normalized_distance);
941  break;
942  case kLanczos5:
943  return LanczosFilter (5, normalized_distance);
944  break;
945  case kLanczos8:
946  return LanczosFilter (8, normalized_distance);
947  break;
948  case kTrim: {
949  Double_t x = normalized_distance / fMax_distance;
950  x = 1 - x*x*x;
951  return x*x*x;
952  }
953  break;
954  default:
955  Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
956  break;
957  }
958 
959  return 0;
960 }
961 
962 ////////////////////////////////////////////////////////////////////////////////
963 /// Calculating the normalization factor only once (might need a reset at some point.
964 /// Can the method be restarted with different params?)
965 
967 {
968  // Caching jammed to disable function.
969  // It's not really useful afterall, badly implemented and untested :-)
970  TTHREAD_TLS(Double_t) ret = 1.0;
971 
972  if (ret != 0.0) return ret*pdf;
973 
974  // We first normalize by the volume of the hypersphere.
975  switch (fKernelEstimator) {
976  case kBox:
977  case kSphere:
978  ret = 1.;
979  break;
980  case kTeepee:
981  ret = (GetNvar() * (GetNvar() + 1) * TMath::Gamma (((Double_t) GetNvar()) / 2.)) /
982  ( TMath::Power (2., (Double_t) GetNvar() + 1) * TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.));
983  break;
984  case kGauss:
985  // We use full range integral here. Reasonable because of the fast function decay.
986  ret = 1. / TMath::Power ( 2 * TMath::Pi() * fGaussSigmaNorm * fGaussSigmaNorm, ((Double_t) GetNvar()) / 2.);
987  break;
988  case kSinc3:
989  case kSinc5:
990  case kSinc7:
991  case kSinc9:
992  case kSinc11:
993  case kLanczos2:
994  case kLanczos3:
995  case kLanczos5:
996  case kLanczos8:
997  // We use the full range integral here. Reasonable because the central lobe domintes it.
998  ret = 1 / TMath::Power ( 2., (Double_t) GetNvar() );
999  break;
1000  default:
1001  Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
1002  }
1003 
1004  // Normalizing by the full volume
1005  ret *= ( TMath::Power (2., static_cast<Int_t>(GetNvar())) * TMath::Gamma (1 + (((Double_t) GetNvar()) / 2.)) ) /
1006  TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.);
1007 
1008  return ret*pdf;
1009 }
1010 
1011 ////////////////////////////////////////////////////////////////////////////////
1012 /// We use Euclidian metric here. Might not be best or most efficient.
1013 
1015  const BinarySearchTreeNode &sample_event,
1016  Double_t *dim_normalization)
1017 {
1018  Double_t ret=0;
1019  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1020  Double_t dist = dim_normalization[ivar] * (sample_event.GetEventV()[ivar] - base_event.GetValue(ivar));
1021  ret += dist*dist;
1022  }
1023  // DD 26.11.2008: bugfix: division by (GetNvar()) was missing for correct normalisation
1024  ret /= GetNvar();
1025  return TMath::Sqrt (ret);
1026 }
1027 
1028 ////////////////////////////////////////////////////////////////////////////////
1029 /// NormSinc
1030 
1032 {
1033  if (x < 10e-10 && x > -10e-10) {
1034  return 1; // Poor man's l'Hopital
1035  }
1036 
1037  Double_t pix = TMath::Pi() * x;
1038  Double_t sinc = TMath::Sin(pix) / pix;
1039  Double_t ret;
1040 
1041  if (GetNvar() % 2)
1042  ret = TMath::Power (sinc, static_cast<Int_t>(GetNvar()));
1043  else
1044  ret = TMath::Abs (sinc) * TMath::Power (sinc, static_cast<Int_t>(GetNvar() - 1));
1045 
1046  return ret;
1047 }
1048 
1049 ////////////////////////////////////////////////////////////////////////////////
1050 /// Lanczos Filter
1051 
1053 {
1054  if (x < 10e-10 && x > -10e-10) {
1055  return 1; // Poor man's l'Hopital
1056  }
1057 
1058  Double_t pix = TMath::Pi() * x;
1059  Double_t pixtimesn = pix * ((Double_t) level);
1060  Double_t lanczos = (TMath::Sin(pix) / pix) * (TMath::Sin(pixtimesn) / pixtimesn);
1061  Double_t ret;
1062 
1063  if (GetNvar() % 2) ret = TMath::Power (lanczos, static_cast<Int_t>(GetNvar()));
1064  else ret = TMath::Abs (lanczos) * TMath::Power (lanczos, static_cast<Int_t>(GetNvar() - 1));
1065 
1066  return ret;
1067 }
1068 
1069 ////////////////////////////////////////////////////////////////////////////////
1070 /// statistical error estimate for RS estimator
1071 
1073  Float_t sumW2S, Float_t sumW2B ) const
1074 {
1075  Float_t c = fScaleB/fScaleS;
1076  Float_t d = countS + c*countB; d *= d;
1077 
1078  if (d < 1e-10) return 1; // Error is zero because of B = S = 0
1079 
1080  Float_t f = c*c/d/d;
1081  Float_t err = f*countB*countB*sumW2S + f*countS*countS*sumW2B;
1082 
1083  if (err < 1e-10) return 1; // Error is zero because of B or S = 0
1084 
1085  return sqrt(err);
1086 }
1087 
1088 ////////////////////////////////////////////////////////////////////////////////
1089 /// write weights to xml file
1090 
1091 void TMVA::MethodPDERS::AddWeightsXMLTo( void* parent ) const
1092 {
1093  void* wght = gTools().AddChild(parent, "Weights");
1094  if (fBinaryTree)
1095  fBinaryTree->AddXMLTo(wght);
1096  else
1097  Log() << kFATAL << "Signal and background binary search tree not available" << Endl;
1098  //Log() << kFATAL << "Please implement writing of weights as XML" << Endl;
1099 }
1100 
1101 ////////////////////////////////////////////////////////////////////////////////
1102 
1104 {
1105  if (NULL != fBinaryTree) delete fBinaryTree;
1106  void* treenode = gTools().GetChild(wghtnode);
1107  fBinaryTree = TMVA::BinarySearchTree::CreateFromXML(treenode);
1108  if(!fBinaryTree)
1109  Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1110  if(!fBinaryTree)
1111  Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1112  fBinaryTree->SetPeriode( GetNvar() );
1113  fBinaryTree->CalcStatistics();
1114  fBinaryTree->CountNodes();
1115  if (fBinaryTree->GetSumOfWeights( Types::kSignal ) > 0)
1116  fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
1117  else fScaleS = 1;
1118  if (fBinaryTree->GetSumOfWeights( Types::kBackground ) > 0)
1119  fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
1120  else fScaleB = 1;
1121  Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1122  CalcAverages();
1123  SetVolumeElement();
1124  fInitializedVolumeEle = kTRUE;
1125 }
1126 
1127 ////////////////////////////////////////////////////////////////////////////////
1128 /// read weight info from file
1129 
1131 {
1132  if (NULL != fBinaryTree) delete fBinaryTree;
1133 
1134  fBinaryTree = new BinarySearchTree();
1135 
1136  istr >> *fBinaryTree;
1137 
1138  fBinaryTree->SetPeriode( GetNvar() );
1139 
1140  fBinaryTree->CalcStatistics();
1141 
1142  fBinaryTree->CountNodes();
1143 
1144  // these are the signal and background scales for the weights
1145  fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
1146  fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
1147 
1148  Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1149 
1150  CalcAverages();
1151 
1152  SetVolumeElement();
1153 
1154  fInitializedVolumeEle = kTRUE;
1155 }
1156 
1157 ////////////////////////////////////////////////////////////////////////////////
1158 /// write training sample (TTree) to file
1159 
1161 {
1162 }
1163 
1164 ////////////////////////////////////////////////////////////////////////////////
1165 /// read training sample from file
1166 
1168 {
1169 }
1170 
1171 ////////////////////////////////////////////////////////////////////////////////
1172 /// static pointer to this object
1173 
1175 {
1176  return GetMethodPDERSThreadLocal();
1177 }
1178 ////////////////////////////////////////////////////////////////////////////////
1179 /// update static this pointer
1180 
1182 {
1183  GetMethodPDERSThreadLocal() = this;
1184 }
1185 
1186 ////////////////////////////////////////////////////////////////////////////////
1187 /// write specific classifier response
1188 
1189 void TMVA::MethodPDERS::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1190 {
1191  fout << " // not implemented for class: \"" << className << "\"" << std::endl;
1192  fout << "};" << std::endl;
1193 }
1194 
1195 ////////////////////////////////////////////////////////////////////////////////
1196 /// get help message text
1197 ///
1198 /// typical length of text line:
1199 /// "|--------------------------------------------------------------|"
1200 
1202 {
1203  Log() << Endl;
1204  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1205  Log() << Endl;
1206  Log() << "PDERS is a generalization of the projective likelihood classifier " << Endl;
1207  Log() << "to N dimensions, where N is the number of input variables used." << Endl;
1208  Log() << "In its adaptive form it is mostly equivalent to k-Nearest-Neighbor" << Endl;
1209  Log() << "(k-NN) methods. If the multidimensional PDF for signal and background" << Endl;
1210  Log() << "were known, this classifier would exploit the full information" << Endl;
1211  Log() << "contained in the input variables, and would hence be optimal. In " << Endl;
1212  Log() << "practice however, huge training samples are necessary to sufficiently " << Endl;
1213  Log() << "populate the multidimensional phase space. " << Endl;
1214  Log() << Endl;
1215  Log() << "The simplest implementation of PDERS counts the number of signal" << Endl;
1216  Log() << "and background events in the vicinity of a test event, and returns" << Endl;
1217  Log() << "a weight according to the majority species of the neighboring events." << Endl;
1218  Log() << "A more involved version of PDERS (selected by the option \"KernelEstimator\")" << Endl;
1219  Log() << "uses Kernel estimation methods to approximate the shape of the PDF." << Endl;
1220  Log() << Endl;
1221  Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
1222  Log() << Endl;
1223  Log() << "PDERS can be very powerful in case of strongly non-linear problems, " << Endl;
1224  Log() << "e.g., distinct islands of signal and background regions. Because of " << Endl;
1225  Log() << "the exponential growth of the phase space, it is important to restrict" << Endl;
1226  Log() << "the number of input variables (dimension) to the strictly necessary." << Endl;
1227  Log() << Endl;
1228  Log() << "Note that PDERS is a slowly responding classifier. Moreover, the necessity" << Endl;
1229  Log() << "to store the entire binary tree in memory, to avoid accessing virtual " << Endl;
1230  Log() << "memory, limits the number of training events that can effectively be " << Endl;
1231  Log() << "used to model the multidimensional PDF." << Endl;
1232  Log() << Endl;
1233  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1234  Log() << Endl;
1235  Log() << "If the PDERS response is found too slow when using the adaptive volume " << Endl;
1236  Log() << "size (option \"VolumeRangeMode=Adaptive\"), it might be found beneficial" << Endl;
1237  Log() << "to reduce the number of events required in the volume, and/or to enlarge" << Endl;
1238  Log() << "the allowed range (\"NeventsMin/Max\"). PDERS is relatively insensitive" << Endl;
1239  Log() << "to the width (\"GaussSigma\") of the Gaussian kernel (if used)." << Endl;
1240 }
std::vector< Double_t > * fLower
Definition: Volume.h:75
void Delete(void)
Definition: Volume.cxx:128
void UpdateThis()
update static this pointer
double dist(Rotation3D const &r1, Rotation3D const &r2)
Definition: 3DDistances.cxx:48
virtual ~MethodPDERS(void)
destructor
Definition: Buttons.h:29
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Double_t GetNormalizedDistance(const TMVA::Event &base_event, const BinarySearchTreeNode &sample_event, Double_t *dim_normalization)
We use Euclidian metric here. Might not be best or most efficient.
const Bool_t MethodPDERS_UseFindRoot
Definition: MethodPDERS.cxx:87
float Float_t
Definition: RtypesCore.h:53
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:314
#define assert(cond)
Definition: unittest.h:542
Double_t NormSinc(Double_t x)
NormSinc.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:45
EAnalysisType
Definition: Types.h:124
void CreateBinarySearchTree(Types::ETreeType type)
create binary search trees for signal and background
std::vector< Double_t > * fUpper
Definition: Volume.h:76
Double_t CKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &)
normalization factors so we can work with radius 1 hyperspheres
Basic string class.
Definition: TString.h:137
void ScaleInterval(Double_t f)
Definition: Volume.cxx:142
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
init the size of a volume element using a defined fraction of the volume containing the entire events...
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:352
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:501
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:231
void Init(void)
default initialisation routine called by all constructors
static Double_t IGetVolumeContentForRoot(Double_t)
Interface to RootFinder.
double sqrt(double)
Tools & gTools()
Definition: Tools.cxx:79
void RKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &, std::vector< Float_t > *pdfSum)
normalization factors so we can work with radius 1 hyperspheres
Double_t x[n]
Definition: legend1.C:17
void ReadWeightsFromStream(std::istream &istr)
read weight info from file
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
std::vector< std::vector< double > > Data
void GetHelpMessage() const
get help message text
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
Definition: RootFinder.cxx:63
void CalcAverages()
compute also average RMS values required for adaptive Gaussian
Double_t CRScalc(const Event &)
static BinarySearchTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: VolumeRangeMo...
ROOT::R::TRInterface & r
Definition: Object.C:4
void SetVolumeElement(void)
defines volume dimensions
void WriteWeightsToStream(TFile &rf) const
write training sample (TTree) to file
SVector< double, 2 > v
Definition: Dict.h:5
unsigned int UInt_t
Definition: RtypesCore.h:42
ClassImp(TMVA::MethodPDERS) TMVA
standard constructor for the PDERS method
Definition: MethodPDERS.cxx:93
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:354
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Definition: TMath.cxx:453
Double_t Pi()
Definition: TMath.h:44
void ReadWeightsFromXML(void *wghtnode)
static MethodPDERS * ThisPDERS(void)
static pointer to this object
void Train(void)
this is a dummy training: the preparation work to do is the construction of the binary tree as a poin...
double f(double x)
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:41
int type
Definition: TGX11.cxx:120
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
PDERS can handle classification with 2 classes and regression with one or more regression-targets.
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:101
const std::vector< Float_t > & GetRegressionValues()
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
void AddWeightsXMLTo(void *parent) const
write weights to xml file
Double_t Sin(Double_t)
Definition: TMath.h:421
void ProcessOptions()
process the options specified by the user
const std::vector< Float_t > & GetEventV() const
#define NULL
Definition: Rtypes.h:82
void GetSample(const Event &e, std::vector< const BinarySearchTreeNode * > &events, Volume *volume)
Double_t GetVolumeContentForRoot(Double_t)
count number of events in rescaled volume
MethodPDERS(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption, TDirectory *theTargetDir=0)
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
Double_t ApplyKernelFunction(Double_t normalized_distance)
from the normalized euclidean distance calculate the distance for a certain kernel ...
const Bool_t kTRUE
Definition: Rtypes.h:91
Double_t LanczosFilter(Int_t level, Double_t x)
Lanczos Filter.
void RRScalc(const Event &, std::vector< Float_t > *count)
Definition: math.cpp:60
Float_t GetError(Float_t countS, Float_t countB, Float_t sumW2S, Float_t sumW2B) const
statistical error estimate for RS estimator
Double_t KernelNormalization(Double_t pdf)
Calculating the normalization factor only once (might need a reset at some point. ...