Logo ROOT   6.08/07
Reference Guide
MethodPDEFoam.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis *
6  * Package: TMVA *
7  * Class : MethodPDEFoam *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Tancredi Carli - CERN, Switzerland *
15  * Dominik Dannheim - CERN, Switzerland *
16  * Alexander Voigt - TU Dresden, Germany *
17  * Peter Speckmayer - CERN, Switzerland *
18  * *
19  * Original author of the TFoam implementation: *
20  * S. Jadach - Institute of Nuclear Physics, Cracow, Poland *
21  * *
22  * Copyright (c) 2008, 2010: *
23  * CERN, Switzerland *
24  * MPI-K Heidelberg, Germany *
25  * *
26  * Redistribution and use in source and binary forms, with or without *
27  * modification, are permitted according to the terms listed in LICENSE *
28  * (http://tmva.sourceforge.net/LICENSE) *
29  **********************************************************************************/
30 
31 //_______________________________________________________________________
32 //
33 // MethodPDEFoam
34 //
35 // The PDEFoam method is an extension of the PDERS method, which
36 // divides the multi-dimensional phase space in a finite number of
37 // hyper-rectangles (cells) of constant event density. This "foam" of
38 // cells is filled with averaged probability-density information
39 // sampled from a training event sample.
40 //
41 // For a given number of cells, the binning algorithm adjusts the size
42 // and position of the cells inside the multidimensional phase space
43 // based on a binary-split algorithm, minimizing the variance of the
44 // event density in the cell.
45 // The binned event density information of the final foam is stored in
46 // binary trees, allowing for a fast and memory-efficient
47 // classification of events.
48 //
49 // The implementation of PDEFoam is based on the Monte-Carlo
50 // integration package TFoam included in the analysis package ROOT.
51 // _______________________________________________________________________
52 
53 #include "TMVA/MethodPDEFoam.h"
54 
55 #include "TMVA/ClassifierFactory.h"
56 #include "TMVA/Config.h"
57 #include "TMVA/Configurable.h"
58 #include "TMVA/CrossEntropy.h"
59 #include "TMVA/DataSet.h"
60 #include "TMVA/DataSetInfo.h"
61 #include "TMVA/Event.h"
62 #include "TMVA/GiniIndex.h"
64 #include "TMVA/IMethod.h"
66 #include "TMVA/MethodBase.h"
67 #include "TMVA/MsgLogger.h"
68 #include "TMVA/Ranking.h"
69 #include "TMVA/SdivSqrtSplusB.h"
70 #include "TMVA/SeparationBase.h"
71 #include "TMVA/Tools.h"
72 #include "TMVA/Types.h"
73 #include "TMVA/VariableInfo.h"
74 
75 #include "TMath.h"
76 #include "TH1F.h"
77 #include "TFile.h"
78 
79 REGISTER_METHOD(PDEFoam)
80 
82 
83 ////////////////////////////////////////////////////////////////////////////////
84 /// init PDEFoam objects
85 
87  const TString& methodTitle,
88  DataSetInfo& dsi,
89  const TString& theOption ) :
90  MethodBase( jobName, Types::kPDEFoam, methodTitle, dsi, theOption)
91  , fSigBgSeparated(kFALSE)
92  , fFrac(0.001)
93  , fDiscrErrCut(-1.0)
94  , fVolFrac(1.0/15.0)
95  , fnCells(999)
96  , fnActiveCells(500)
97  , fnSampl(2000)
98  , fnBin(5)
99  , fEvPerBin(10000)
100  , fCompress(kTRUE)
101  , fMultiTargetRegression(kFALSE)
102  , fNmin(100)
103  , fCutNmin(kTRUE)
104  , fMaxDepth(0)
105  , fKernelStr("None")
106  , fKernel(kNone)
107  , fKernelEstimator(NULL)
108  , fTargetSelectionStr("Mean")
109  , fTargetSelection(kMean)
110  , fFillFoamWithOrigWeights(kFALSE)
111  , fUseYesNoCell(kFALSE)
112  , fDTLogic("None")
113  , fDTSeparation(kFoam)
114  , fPeekMax(kTRUE)
115  , fXmin()
116  , fXmax()
117  , fFoam()
118 {
119 }
120 
121 ////////////////////////////////////////////////////////////////////////////////
122 /// constructor from weight file
123 
125  const TString& theWeightFile) :
126  MethodBase( Types::kPDEFoam, dsi, theWeightFile)
127  , fSigBgSeparated(kFALSE)
128  , fFrac(0.001)
129  , fDiscrErrCut(-1.0)
130  , fVolFrac(1.0/15.0)
131  , fnCells(999)
132  , fnActiveCells(500)
133  , fnSampl(2000)
134  , fnBin(5)
135  , fEvPerBin(10000)
136  , fCompress(kTRUE)
137  , fMultiTargetRegression(kFALSE)
138  , fNmin(100)
139  , fCutNmin(kTRUE)
140  , fMaxDepth(0)
141  , fKernelStr("None")
142  , fKernel(kNone)
143  , fKernelEstimator(NULL)
144  , fTargetSelectionStr("Mean")
145  , fTargetSelection(kMean)
146  , fFillFoamWithOrigWeights(kFALSE)
147  , fUseYesNoCell(kFALSE)
148  , fDTLogic("None")
149  , fDTSeparation(kFoam)
150  , fPeekMax(kTRUE)
151  , fXmin()
152  , fXmax()
153  , fFoam()
154 {
155 }
156 
157 ////////////////////////////////////////////////////////////////////////////////
158 /// PDEFoam can handle classification with multiple classes and regression
159 /// with one or more regression-targets
160 
162 {
163  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
164  if (type == Types::kMulticlass ) return kTRUE;
165  if (type == Types::kRegression) return kTRUE;
166  return kFALSE;
167 }
168 
169 ////////////////////////////////////////////////////////////////////////////////
170 /// default initialization called by all constructors
171 
173 {
174  // init PDEFoam options
175  fSigBgSeparated = kFALSE; // default: unified foam
176  fFrac = 0.001; // fraction of outlier events
177  fDiscrErrCut = -1.; // cut on discriminator error
178  fVolFrac = 1./15.; // range searching box size
179  fnActiveCells = 500; // number of active cells to create
180  fnCells = fnActiveCells*2-1; // total number of cells
181  fnSampl = 2000; // number of sampling points in cell
182  fnBin = 5; // number of bins in edge histogram
183  fEvPerBin = 10000; // number of events per bin
184  fNmin = 100; // minimum number of events in cell
185  fMaxDepth = 0; // cell tree depth (default: unlimited)
186  fFillFoamWithOrigWeights = kFALSE; // fill orig. weights into foam
187  fUseYesNoCell = kFALSE; // return -1 or 1 for bg or signal events
188  fDTLogic = "None"; // decision tree algorithmus
189  fDTSeparation = kFoam; // separation type
190 
191  fKernel = kNone; // default: use no kernel
192  fKernelEstimator= NULL; // kernel estimator used during evaluation
193  fTargetSelection= kMean; // default: use mean for target selection (only multi target regression!)
194 
195  fCompress = kTRUE; // compress ROOT output file
196  fMultiTargetRegression = kFALSE; // multi-target regression
197 
198  DeleteFoams();
199 
200  if (fUseYesNoCell)
201  SetSignalReferenceCut( 0.0 ); // MVA output in [-1, 1]
202  else
203  SetSignalReferenceCut( 0.5 ); // MVA output in [0, 1]
204 }
205 
206 ////////////////////////////////////////////////////////////////////////////////
207 ///
208 /// Declare MethodPDEFoam options
209 ///
210 
212 {
213  DeclareOptionRef( fSigBgSeparated = kFALSE, "SigBgSeparate", "Separate foams for signal and background" );
214  DeclareOptionRef( fFrac = 0.001, "TailCut", "Fraction of outlier events that are excluded from the foam in each dimension" );
215  DeclareOptionRef( fVolFrac = 1./15., "VolFrac", "Size of sampling box, used for density calculation during foam build-up (maximum value: 1.0 is equivalent to volume of entire foam)");
216  DeclareOptionRef( fnActiveCells = 500, "nActiveCells", "Maximum number of active cells to be created by the foam");
217  DeclareOptionRef( fnSampl = 2000, "nSampl", "Number of generated MC events per cell");
218  DeclareOptionRef( fnBin = 5, "nBin", "Number of bins in edge histograms");
219  DeclareOptionRef( fCompress = kTRUE, "Compress", "Compress foam output file");
220  DeclareOptionRef( fMultiTargetRegression = kFALSE, "MultiTargetRegression", "Do regression with multiple targets");
221  DeclareOptionRef( fNmin = 100, "Nmin", "Number of events in cell required to split cell");
222  DeclareOptionRef( fMaxDepth = 0, "MaxDepth", "Maximum depth of cell tree (0=unlimited)");
223  DeclareOptionRef( fFillFoamWithOrigWeights = kFALSE, "FillFoamWithOrigWeights", "Fill foam with original or boost weights");
224  DeclareOptionRef( fUseYesNoCell = kFALSE, "UseYesNoCell", "Return -1 or 1 for bkg or signal like events");
225  DeclareOptionRef( fDTLogic = "None", "DTLogic", "Use decision tree algorithm to split cells");
226  AddPreDefVal(TString("None"));
227  AddPreDefVal(TString("GiniIndex"));
228  AddPreDefVal(TString("MisClassificationError"));
229  AddPreDefVal(TString("CrossEntropy"));
230  AddPreDefVal(TString("GiniIndexWithLaplace"));
231  AddPreDefVal(TString("SdivSqrtSplusB"));
232 
233  DeclareOptionRef( fKernelStr = "None", "Kernel", "Kernel type used");
234  AddPreDefVal(TString("None"));
235  AddPreDefVal(TString("Gauss"));
236  AddPreDefVal(TString("LinNeighbors"));
237  DeclareOptionRef( fTargetSelectionStr = "Mean", "TargetSelection", "Target selection method");
238  AddPreDefVal(TString("Mean"));
239  AddPreDefVal(TString("Mpv"));
240 }
241 
242 
243 ////////////////////////////////////////////////////////////////////////////////
244 /// options that are used ONLY for the READER to ensure backward compatibility
245 
248  DeclareOptionRef(fCutNmin = kTRUE, "CutNmin", "Requirement for minimal number of events in cell");
249  DeclareOptionRef(fPeekMax = kTRUE, "PeekMax", "Peek cell with max. loss for the next split");
250 }
251 
252 ////////////////////////////////////////////////////////////////////////////////
253 /// process user options
254 
256 {
257  if (!(fFrac>=0. && fFrac<=1.)) {
258  Log() << kWARNING << "TailCut not in [0.,1] ==> using 0.001 instead" << Endl;
259  fFrac = 0.001;
260  }
261 
262  if (fnActiveCells < 1) {
263  Log() << kWARNING << "invalid number of active cells specified: "
264  << fnActiveCells << "; setting nActiveCells=2" << Endl;
265  fnActiveCells = 2;
266  }
267  fnCells = fnActiveCells*2-1;
268 
269  // DT logic is only applicable if a single foam is trained
270  if (fSigBgSeparated && fDTLogic != "None") {
271  Log() << kFATAL << "Decision tree logic works only for a single foam (SigBgSeparate=F)" << Endl;
272  }
273 
274  // set separation to use
275  if (fDTLogic == "None")
277  else if (fDTLogic == "GiniIndex")
279  else if (fDTLogic == "MisClassificationError")
281  else if (fDTLogic == "CrossEntropy")
283  else if (fDTLogic == "GiniIndexWithLaplace")
285  else if (fDTLogic == "SdivSqrtSplusB")
287  else {
288  Log() << kWARNING << "Unknown separation type: " << fDTLogic
289  << ", setting to None" << Endl;
290  fDTLogic = "None";
292  }
293 
294  if (fKernelStr == "None" ) fKernel = kNone;
295  else if (fKernelStr == "Gauss" ) fKernel = kGaus;
296  else if (fKernelStr == "LinNeighbors") fKernel = kLinN;
297 
298  if (fTargetSelectionStr == "Mean" ) fTargetSelection = kMean;
299  else fTargetSelection = kMpv;
300  // sanity check: number of targets > 1 and MultiTargetRegression=F
301  // makes no sense --> set MultiTargetRegression=T
302  if (DoRegression() && Data()->GetNTargets() > 1 && !fMultiTargetRegression) {
303  Log() << kWARNING << "Warning: number of targets > 1"
304  << " and MultiTargetRegression=F was set, this makes no sense!"
305  << " --> I'm setting MultiTargetRegression=T" << Endl;
307  }
308 }
309 
310 ////////////////////////////////////////////////////////////////////////////////
311 /// destructor
312 
314 {
315  DeleteFoams();
316 
317  if (fKernelEstimator != NULL)
318  delete fKernelEstimator;
319 }
320 
321 ////////////////////////////////////////////////////////////////////////////////
322 /// Determine foam range [fXmin, fXmax] for all dimensions, such
323 /// that a fraction of 'fFrac' events lie outside the foam.
324 
326 {
327  fXmin.clear();
328  fXmax.clear();
329  UInt_t kDim = GetNvar(); // == Data()->GetNVariables();
330  UInt_t tDim = Data()->GetNTargets();
331  UInt_t vDim = Data()->GetNVariables();
333  kDim += tDim;
334 
335  Float_t *xmin = new Float_t[kDim];
336  Float_t *xmax = new Float_t[kDim];
337 
338  // set default values
339  for (UInt_t dim=0; dim<kDim; dim++) {
340  xmin[dim] = FLT_MAX;
341  xmax[dim] = FLT_MIN;
342  }
343 
344  Log() << kDEBUG << "Number of training events: " << Data()->GetNTrainingEvents() << Endl;
345  Int_t nevoutside = (Int_t)((Data()->GetNTrainingEvents())*(fFrac)); // number of events that are outside the range
346  Int_t rangehistbins = 10000; // number of bins in histos
347 
348  // loop over all testing singnal and BG events and clac minimal and
349  // maximal value of every variable
350  for (Long64_t i=0; i<(GetNEvents()); i++) { // events loop
351  const Event* ev = GetEvent(i);
352  for (UInt_t dim=0; dim<kDim; dim++) { // variables loop
353  Float_t val;
355  if (dim < vDim)
356  val = ev->GetValue(dim);
357  else
358  val = ev->GetTarget(dim-vDim);
359  }
360  else
361  val = ev->GetValue(dim);
362 
363  if (val<xmin[dim])
364  xmin[dim] = val;
365  if (val>xmax[dim])
366  xmax[dim] = val;
367  }
368  }
369 
370  // Create and fill histograms for each dimension (with same events
371  // as before), to determine range based on number of events outside
372  // the range
373  TH1F **range_h = new TH1F*[kDim];
374  for (UInt_t dim=0; dim<kDim; dim++) {
375  range_h[dim] = new TH1F(Form("range%i", dim), "range", rangehistbins, xmin[dim], xmax[dim]);
376  }
377 
378  // fill all testing events into histos
379  for (Long64_t i=0; i<GetNEvents(); i++) {
380  const Event* ev = GetEvent(i);
381  for (UInt_t dim=0; dim<kDim; dim++) {
383  if (dim < vDim)
384  range_h[dim]->Fill(ev->GetValue(dim));
385  else
386  range_h[dim]->Fill(ev->GetTarget(dim-vDim));
387  }
388  else
389  range_h[dim]->Fill(ev->GetValue(dim));
390  }
391  }
392 
393  // calc Xmin, Xmax from Histos
394  for (UInt_t dim=0; dim<kDim; dim++) {
395  for (Int_t i=1; i<(rangehistbins+1); i++) { // loop over bins
396  if (range_h[dim]->Integral(0, i) > nevoutside) { // calc left limit (integral over bins 0..i = nevoutside)
397  xmin[dim]=range_h[dim]->GetBinLowEdge(i);
398  break;
399  }
400  }
401  for (Int_t i=rangehistbins; i>0; i--) { // calc right limit (integral over bins i..max = nevoutside)
402  if (range_h[dim]->Integral(i, (rangehistbins+1)) > nevoutside) {
403  xmax[dim]=range_h[dim]->GetBinLowEdge(i+1);
404  break;
405  }
406  }
407  }
408  // now xmin[] and xmax[] contain upper/lower limits for every dimension
409 
410  // copy xmin[], xmax[] values to the class variable
411  fXmin.clear();
412  fXmax.clear();
413  for (UInt_t dim=0; dim<kDim; dim++) {
414  fXmin.push_back(xmin[dim]);
415  fXmax.push_back(xmax[dim]);
416  }
417 
418 
419  delete[] xmin;
420  delete[] xmax;
421 
422  // delete histos
423  for (UInt_t dim=0; dim<kDim; dim++)
424  delete range_h[dim];
425  delete[] range_h;
426 
427  return;
428 }
429 
430 ////////////////////////////////////////////////////////////////////////////////
431 /// Train PDE-Foam depending on the set options
432 
434 {
435  Log() << kVERBOSE << "Calculate Xmin and Xmax for every dimension" << Endl;
436  CalcXminXmax();
437 
438  // delete foams
439  DeleteFoams();
440 
441  // start training
442  if (DoRegression()) {
445  else
447  }
448  else {
449  if (DoMulticlass())
451  else {
452  if (DataInfo().GetNormalization() != "EQUALNUMEVENTS" ) {
453  Log() << kHEADER << "NormMode=" << DataInfo().GetNormalization()
454  << " chosen. Note that only NormMode=EqualNumEvents"
455  << " ensures that Discriminant values correspond to"
456  << " signal probabilities." << Endl;
457  }
458 
459  Log() << kDEBUG << "N_sig for training events: " << Data()->GetNEvtSigTrain() << Endl;
460  Log() << kDEBUG << "N_bg for training events: " << Data()->GetNEvtBkgdTrain() << Endl;
461  Log() << kDEBUG << "User normalization: " << DataInfo().GetNormalization().Data() << Endl;
462 
463  if (fSigBgSeparated)
465  else
467  }
468  }
469 
470  // delete the binary search tree in order to save memory
471  for(UInt_t i=0; i<fFoam.size(); i++) {
472  if(fFoam.at(i))
473  fFoam.at(i)->DeleteBinarySearchTree();
474  }
476 }
477 
478 ////////////////////////////////////////////////////////////////////////////////
479 /// Creation of 2 separated foams: one for signal events, one for
480 /// backgound events. At the end the foam cells of fFoam[0] will
481 /// contain the average number of signal events and fFoam[1] will
482 /// contain the average number of background events.
483 
485 {
486  TString foamcaption[2];
487  foamcaption[0] = "SignalFoam";
488  foamcaption[1] = "BgFoam";
489 
490  for(int i=0; i<2; i++) {
491  // create 2 PDEFoams
492  fFoam.push_back( InitFoam(foamcaption[i], kSeparate) );
493 
494  Log() << kVERBOSE << "Filling binary search tree of " << foamcaption[i]
495  << " with events" << Endl;
496  // insert event to BinarySearchTree
497  for (Long64_t k=0; k<GetNEvents(); ++k) {
498  const Event* ev = GetEvent(k);
499  if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
500  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
501  fFoam.back()->FillBinarySearchTree(ev);
502  }
503 
504  Log() << kINFO << "Build up " << foamcaption[i] << Endl;
505  fFoam.back()->Create(); // build foam
506 
507  Log() << kVERBOSE << "Filling foam cells with events" << Endl;
508  // loop over all events -> fill foam cells
509  for (Long64_t k=0; k<GetNEvents(); ++k) {
510  const Event* ev = GetEvent(k);
512  if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
513  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
514  fFoam.back()->FillFoamCells(ev, weight);
515  }
516  }
517 }
518 
519 ////////////////////////////////////////////////////////////////////////////////
520 /// Create only one unified foam (fFoam[0]) whose cells contain the
521 /// average discriminator (N_sig)/(N_sig + N_bg)
522 
524 {
525  fFoam.push_back( InitFoam("DiscrFoam", kDiscr, fSignalClass) );
526 
527  Log() << kVERBOSE << "Filling binary search tree of discriminator foam with events" << Endl;
528  // insert event to BinarySearchTree
529  for (Long64_t k=0; k<GetNEvents(); ++k) {
530  const Event* ev = GetEvent(k);
531  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
532  fFoam.back()->FillBinarySearchTree(ev);
533  }
534 
535  Log() << kINFO << "Build up discriminator foam" << Endl;
536  fFoam.back()->Create(); // build foam
537 
538  Log() << kVERBOSE << "Filling foam cells with events" << Endl;
539  // loop over all training events -> fill foam cells with N_sig and N_Bg
540  for (Long64_t k=0; k<GetNEvents(); ++k) {
541  const Event* ev = GetEvent(k);
543  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
544  fFoam.back()->FillFoamCells(ev, weight);
545  }
546 
547  Log() << kVERBOSE << "Calculate cell discriminator"<< Endl;
548  // calc discriminator (and it's error) for each cell
549  fFoam.back()->Finalize();
550 }
551 
552 ////////////////////////////////////////////////////////////////////////////////
553 /// Create one unified foam (see TrainUnifiedClassification()) for
554 /// each class, where the cells of foam i (fFoam[i]) contain the
555 /// average fraction of events of class i, i.e.
556 ///
557 /// D = number events of class i / total number of events
558 
560 {
561  for (UInt_t iClass=0; iClass<DataInfo().GetNClasses(); ++iClass) {
562 
563  fFoam.push_back( InitFoam(Form("MultiClassFoam%u",iClass), kMultiClass, iClass) );
564 
565  Log() << kVERBOSE << "Filling binary search tree of multiclass foam "
566  << iClass << " with events" << Endl;
567  // insert event to BinarySearchTree
568  for (Long64_t k=0; k<GetNEvents(); ++k) {
569  const Event* ev = GetEvent(k);
570  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
571  fFoam.back()->FillBinarySearchTree(ev);
572  }
573 
574  Log() << kINFO << "Build up multiclass foam " << iClass << Endl;
575  fFoam.back()->Create(); // build foam
576 
577  Log() << kVERBOSE << "Filling foam cells with events" << Endl;
578  // loop over all training events and fill foam cells with signal
579  // and background events
580  for (Long64_t k=0; k<GetNEvents(); ++k) {
581  const Event* ev = GetEvent(k);
583  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
584  fFoam.back()->FillFoamCells(ev, weight);
585  }
586 
587  Log() << kVERBOSE << "Calculate cell discriminator"<< Endl;
588  // calc discriminator (and it's error) for each cell
589  fFoam.back()->Finalize();
590  }
591 }
592 
593 ////////////////////////////////////////////////////////////////////////////////
594 /// Training one (mono target regression) foam, whose cells contain
595 /// the average 0th target. The dimension of the foam = number of
596 /// non-targets (= number of variables).
597 
599 {
600  if (Data()->GetNTargets() != 1) {
601  Log() << kFATAL << "Can't do mono-target regression with "
602  << Data()->GetNTargets() << " targets!" << Endl;
603  }
604 
605  Log() << kDEBUG << "MethodPDEFoam: number of Targets: " << Data()->GetNTargets() << Endl;
606 
607  fFoam.push_back( InitFoam("MonoTargetRegressionFoam", kMonoTarget) );
608 
609  Log() << kVERBOSE << "Filling binary search tree with events" << Endl;
610  // insert event to BinarySearchTree
611  for (Long64_t k=0; k<GetNEvents(); ++k) {
612  const Event* ev = GetEvent(k);
613  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
614  fFoam.back()->FillBinarySearchTree(ev);
615  }
616 
617  Log() << kINFO << "Build mono target regression foam" << Endl;
618  fFoam.back()->Create(); // build foam
619 
620  Log() << kVERBOSE << "Filling foam cells with events" << Endl;
621  // loop over all events -> fill foam cells with target
622  for (Long64_t k=0; k<GetNEvents(); ++k) {
623  const Event* ev = GetEvent(k);
625  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
626  fFoam.back()->FillFoamCells(ev, weight);
627  }
628 
629  Log() << kVERBOSE << "Calculate average cell targets"<< Endl;
630  // calc weight (and it's error) for each cell
631  fFoam.back()->Finalize();
632 }
633 
634 ////////////////////////////////////////////////////////////////////////////////
635 /// Training one (multi target regression) foam, whose cells contain
636 /// the average event density. The dimension of the foam = number
637 /// of non-targets + number of targets.
638 
640 {
641  Log() << kDEBUG << "Number of variables: " << Data()->GetNVariables() << Endl;
642  Log() << kDEBUG << "Number of Targets: " << Data()->GetNTargets() << Endl;
643  Log() << kDEBUG << "Dimension of foam: " << Data()->GetNVariables()+Data()->GetNTargets() << Endl;
644  if (fKernel==kLinN)
645  Log() << kFATAL << "LinNeighbors kernel currently not supported"
646  << " for multi target regression" << Endl;
647 
648  fFoam.push_back( InitFoam("MultiTargetRegressionFoam", kMultiTarget) );
649 
650  Log() << kVERBOSE << "Filling binary search tree of multi target regression foam with events"
651  << Endl;
652  // insert event to BinarySearchTree
653  for (Long64_t k=0; k<GetNEvents(); ++k) {
654  Event *ev = new Event(*GetEvent(k));
655  // since in multi-target regression targets are handled like
656  // variables --> remove targets and add them to the event variabels
657  std::vector<Float_t> targets(ev->GetTargets());
658  const UInt_t nVariables = ev->GetValues().size();
659  for (UInt_t i = 0; i < targets.size(); ++i)
660  ev->SetVal(i+nVariables, targets.at(i));
661  ev->GetTargets().clear();
662  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
663  fFoam.back()->FillBinarySearchTree(ev);
664  // since the binary search tree copies the event, one can delete
665  // it
666  delete ev;
667  }
668 
669  Log() << kINFO << "Build multi target regression foam" << Endl;
670  fFoam.back()->Create(); // build foam
671 
672  Log() << kVERBOSE << "Filling foam cells with events" << Endl;
673  // loop over all events -> fill foam cells with number of events
674  for (Long64_t k=0; k<GetNEvents(); ++k) {
675  Event *ev = new Event(*GetEvent(k));
676  // since in multi-target regression targets are handled like
677  // variables --> remove targets and add them to the event variabels
678  std::vector<Float_t> targets = ev->GetTargets();
679  const UInt_t nVariables = ev->GetValues().size();
681  for (UInt_t i = 0; i < targets.size(); ++i)
682  ev->SetVal(i+nVariables, targets.at(i));
683  ev->GetTargets().clear();
684  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
685  fFoam.back()->FillFoamCells(ev, weight);
686  // since the PDEFoam copies the event, one can delete it
687  delete ev;
688  }
689 }
690 
691 ////////////////////////////////////////////////////////////////////////////////
692 /// Return Mva-Value.
693 ///
694 /// In case of 'fSigBgSeparated==false' (one unifiend PDEFoam was
695 /// trained) the function returns the content of the cell, which
696 /// corresponds to the current TMVA::Event, i.e. D =
697 /// N_sig/(N_bg+N_sig).
698 ///
699 /// In case of 'fSigBgSeparated==true' (two separate PDEFoams were
700 /// trained) the function returns
701 ///
702 /// D = Density_sig/(Density_sig+Density_bg)
703 ///
704 /// where 'Density_sig' is the content of the cell in the signal
705 /// PDEFoam (fFoam[0]) and 'Density_bg' is the content of the cell
706 /// in the background PDEFoam (fFoam[1]).
707 ///
708 /// In both cases the error on the discriminant is stored in 'err'
709 /// and 'errUpper'. (Of course err and errUpper must be non-zero
710 /// and point to valid address to make this work.)
711 
713 {
714  const Event* ev = GetEvent();
715  Double_t discr = 0.;
716 
717  if (fSigBgSeparated) {
718  std::vector<Float_t> xvec = ev->GetValues();
719 
720  Double_t density_sig = 0.; // calc signal event density
721  Double_t density_bg = 0.; // calc background event density
722  density_sig = fFoam.at(0)->GetCellValue(xvec, kValueDensity, fKernelEstimator);
723  density_bg = fFoam.at(1)->GetCellValue(xvec, kValueDensity, fKernelEstimator);
724 
725  // calc disciminator (normed!)
726  if ( (density_sig+density_bg) > 0 )
727  discr = density_sig/(density_sig+density_bg);
728  else
729  discr = 0.5; // assume 50% signal probability, if no events found (bad assumption, but can be overruled by cut on error)
730  }
731  else { // Signal and Bg not separated
732  // get discriminator direct from the foam
733  discr = fFoam.at(0)->GetCellValue(ev->GetValues(), kValue, fKernelEstimator);
734  }
735 
736  // calculate the error
737  if (err || errUpper) {
738  const Double_t discr_error = CalculateMVAError();
739  if (err != 0) *err = discr_error;
740  if (errUpper != 0) *errUpper = discr_error;
741  }
742 
743  if (fUseYesNoCell)
744  return (discr < 0.5 ? -1 : 1);
745  else
746  return discr;
747 }
748 
749 ////////////////////////////////////////////////////////////////////////////////
750 /// Calculate the error on the Mva value
751 ///
752 /// If fSigBgSeparated == true the error is calculated from the
753 /// number of events in the signal and background PDEFoam cells.
754 ///
755 /// If fSigBgSeparated == false, the error is taken directly from
756 /// the PDEFoam cell.
757 
759 {
760  const Event* ev = GetEvent(); // current event
761  Double_t mvaError = 0.0; // the error on the Mva value
762 
763  if (fSigBgSeparated) {
764  const std::vector<Float_t>& xvec = ev->GetValues();
765 
766  const Double_t neventsB = fFoam.at(1)->GetCellValue(xvec, kValue, fKernelEstimator);
767  const Double_t neventsS = fFoam.at(0)->GetCellValue(xvec, kValue, fKernelEstimator);
768  const Double_t scaleB = 1.;
769  // estimation of statistical error on counted signal/background events
770  const Double_t errorS = neventsS == 0 ? 1.0 : TMath::Sqrt(neventsS);
771  const Double_t errorB = neventsB == 0 ? 1.0 : TMath::Sqrt(neventsB);
772 
773  if ((neventsS > 1e-10) || (neventsB > 1e-10)) {
774  // eq. (5) in paper T.Carli, B.Koblitz 2002
775  mvaError = TMath::Sqrt(Sqr(scaleB * neventsB / Sqr(neventsS + scaleB * neventsB) * errorS) +
776  Sqr(scaleB * neventsS / Sqr(neventsS + scaleB * neventsB) * errorB));
777  } else {
778  mvaError = 1.0;
779  }
780  } else { // Signal and Bg not separated
781  // get discriminator error direct from the foam
782  mvaError = fFoam.at(0)->GetCellValue(ev->GetValues(), kValueError, fKernelEstimator);
783  }
784 
785  return mvaError;
786 }
787 
788 ////////////////////////////////////////////////////////////////////////////////
789 /// Get the multiclass MVA response for the PDEFoam classifier. The
790 /// returned MVA values are normalized, i.e. their sum equals 1.
791 
792 const std::vector<Float_t>& TMVA::MethodPDEFoam::GetMulticlassValues()
793 {
794  const TMVA::Event *ev = GetEvent();
795  std::vector<Float_t> xvec = ev->GetValues();
796 
797  if (fMulticlassReturnVal == NULL)
798  fMulticlassReturnVal = new std::vector<Float_t>();
799  fMulticlassReturnVal->clear();
800  fMulticlassReturnVal->reserve(DataInfo().GetNClasses());
801 
802  std::vector<Float_t> temp; // temp class. values
803  UInt_t nClasses = DataInfo().GetNClasses();
804  temp.reserve(nClasses);
805  for (UInt_t iClass = 0; iClass < nClasses; ++iClass) {
806  temp.push_back(fFoam.at(iClass)->GetCellValue(xvec, kValue, fKernelEstimator));
807  }
808 
809  for (UInt_t iClass = 0; iClass < nClasses; ++iClass) {
810  Float_t norm = 0.0; // normalization
811  for (UInt_t j = 0; j < nClasses; ++j) {
812  if (iClass != j)
813  norm += exp(temp[j] - temp[iClass]);
814  }
815  fMulticlassReturnVal->push_back(1.0 / (1.0 + norm));
816  }
817 
818  return *fMulticlassReturnVal;
819 }
820 
821 ////////////////////////////////////////////////////////////////////////////////
822 /// Compute ranking of input variables from the number of cuts made
823 /// in each PDEFoam dimension. The PDEFoam dimension (the variable)
824 /// for which the most cuts were done is ranked highest.
825 
827 {
828  // create the ranking object
829  fRanking = new Ranking(GetName(), "Variable Importance");
830  std::vector<Float_t> importance(GetNvar(), 0);
831 
832  // determine variable importances
833  for (UInt_t ifoam = 0; ifoam < fFoam.size(); ++ifoam) {
834  // get the number of cuts made in every dimension of foam
835  PDEFoamCell *root_cell = fFoam.at(ifoam)->GetRootCell();
836  std::vector<UInt_t> nCuts(fFoam.at(ifoam)->GetTotDim(), 0);
837  GetNCuts(root_cell, nCuts);
838 
839  // fill the importance vector (ignoring the target dimensions in
840  // case of a multi-target regression foam)
841  UInt_t sumOfCuts = 0;
842  std::vector<Float_t> tmp_importance;
843  for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
844  sumOfCuts += nCuts.at(ivar);
845  tmp_importance.push_back( nCuts.at(ivar) );
846  }
847  // normalization of the variable importances of this foam: the
848  // sum of all variable importances equals 1 for this foam
849  for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
850  if (sumOfCuts > 0)
851  tmp_importance.at(ivar) /= sumOfCuts;
852  else
853  tmp_importance.at(ivar) = 0;
854  }
855  // the overall variable importance is the average over all foams
856  for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
857  importance.at(ivar) += tmp_importance.at(ivar) / fFoam.size();
858  }
859  }
860 
861  // fill ranking vector
862  for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
863  fRanking->AddRank(Rank(GetInputLabel(ivar), importance.at(ivar)));
864  }
865 
866  return fRanking;
867 }
868 
869 ////////////////////////////////////////////////////////////////////////////////
870 /// Fill in 'nCuts' the number of cuts made in every foam dimension,
871 /// starting at the root cell 'cell'.
872 ///
873 /// Parameters:
874 ///
875 /// - cell - root cell to start the counting from
876 ///
877 /// - nCuts - the number of cuts are saved in this vector
878 
879 void TMVA::MethodPDEFoam::GetNCuts(PDEFoamCell *cell, std::vector<UInt_t> &nCuts)
880 {
881  if (cell == NULL || cell->GetStat() == 1) // cell is active
882  return;
883 
884  nCuts.at(cell->GetBest())++;
885 
886  if (cell->GetDau0() != NULL)
887  GetNCuts(cell->GetDau0(), nCuts);
888  if (cell->GetDau1() != NULL)
889  GetNCuts(cell->GetDau1(), nCuts);
890 }
891 
892 ////////////////////////////////////////////////////////////////////////////////
893 /// Set Xmin, Xmax for every dimension in the given pdefoam object
894 
896 {
897  if (!pdefoam){
898  Log() << kFATAL << "Null pointer given!" << Endl;
899  return;
900  }
901 
902  UInt_t num_vars = GetNvar();
904  num_vars += Data()->GetNTargets();
905 
906  for (UInt_t idim=0; idim<num_vars; idim++) { // set upper/ lower limit in foam
907  Log()<< kDEBUG << "foam: SetXmin[dim="<<idim<<"]: " << fXmin.at(idim) << Endl;
908  Log()<< kDEBUG << "foam: SetXmax[dim="<<idim<<"]: " << fXmax.at(idim) << Endl;
909  pdefoam->SetXmin(idim, fXmin.at(idim));
910  pdefoam->SetXmax(idim, fXmax.at(idim));
911  }
912 }
913 
914 ////////////////////////////////////////////////////////////////////////////////
915 /// Create a new PDEFoam, set the PDEFoam options (nCells, nBin,
916 /// Xmin, Xmax, etc.) and initialize the PDEFoam by calling
917 /// pdefoam->Initialize().
918 ///
919 /// Parameters:
920 ///
921 /// - foamcaption - name of PDEFoam object
922 ///
923 /// - ft - type of PDEFoam
924 /// Candidates are:
925 /// - kSeparate - creates TMVA::PDEFoamEvent
926 /// - kDiscr - creates TMVA::PDEFoamDiscriminant
927 /// - kMonoTarget - creates TMVA::PDEFoamTarget
928 /// - kMultiTarget - creates TMVA::MultiTarget
929 /// - kMultiClass - creates TMVA::PDEFoamDiscriminant
930 ///
931 /// If 'fDTSeparation != kFoam' then a TMVA::PDEFoamDecisionTree
932 /// is created (the separation type depends on fDTSeparation).
933 ///
934 /// - cls - marked event class (optional, default value = 0)
935 
937 {
938  // number of foam dimensions
939  Int_t dim = 1;
940  if (ft == kMultiTarget)
941  // dimension of foam = number of targets + non-targets
942  dim = Data()->GetNTargets() + Data()->GetNVariables();
943  else
944  dim = GetNvar();
945 
946  // calculate range-searching box
947  std::vector<Double_t> box;
948  for (Int_t idim = 0; idim < dim; ++idim) {
949  box.push_back((fXmax.at(idim) - fXmin.at(idim))* fVolFrac);
950  }
951 
952  // create PDEFoam and PDEFoamDensityBase
953  PDEFoam *pdefoam = NULL;
954  PDEFoamDensityBase *density = NULL;
955  if (fDTSeparation == kFoam) {
956  // use PDEFoam algorithm
957  switch (ft) {
958  case kSeparate:
959  pdefoam = new PDEFoamEvent(foamcaption);
960  density = new PDEFoamEventDensity(box);
961  break;
962  case kMultiTarget:
963  pdefoam = new PDEFoamMultiTarget(foamcaption, fTargetSelection);
964  density = new PDEFoamEventDensity(box);
965  break;
966  case kDiscr:
967  case kMultiClass:
968  pdefoam = new PDEFoamDiscriminant(foamcaption, cls);
969  density = new PDEFoamDiscriminantDensity(box, cls);
970  break;
971  case kMonoTarget:
972  pdefoam = new PDEFoamTarget(foamcaption, 0);
973  density = new PDEFoamTargetDensity(box, 0);
974  break;
975  default:
976  Log() << kFATAL << "Unknown PDEFoam type!" << Endl;
977  break;
978  }
979  } else {
980  // create a decision tree like PDEFoam
981 
982  // create separation type class, which is owned by
983  // PDEFoamDecisionTree (i.e. PDEFoamDecisionTree will delete it)
984  SeparationBase *sepType = NULL;
985  switch (fDTSeparation) {
986  case kGiniIndex:
987  sepType = new GiniIndex();
988  break;
990  sepType = new MisClassificationError();
991  break;
992  case kCrossEntropy:
993  sepType = new CrossEntropy();
994  break;
996  sepType = new GiniIndexWithLaplace();
997  break;
998  case kSdivSqrtSplusB:
999  sepType = new SdivSqrtSplusB();
1000  break;
1001  default:
1002  Log() << kFATAL << "Separation type " << fDTSeparation
1003  << " currently not supported" << Endl;
1004  break;
1005  }
1006  switch (ft) {
1007  case kDiscr:
1008  case kMultiClass:
1009  pdefoam = new PDEFoamDecisionTree(foamcaption, sepType, cls);
1010  density = new PDEFoamDecisionTreeDensity(box, cls);
1011  break;
1012  default:
1013  Log() << kFATAL << "Decision tree cell split algorithm is only"
1014  << " available for (multi) classification with a single"
1015  << " PDE-Foam (SigBgSeparate=F)" << Endl;
1016  break;
1017  }
1018  }
1019 
1020  if (pdefoam) pdefoam->SetDensity(density);
1021  else Log() << kFATAL << "PDEFoam pointer not set, exiting.." << Endl;
1022 
1023  // create pdefoam kernel
1025 
1026  // set fLogger attributes
1027  pdefoam->Log().SetMinType(this->Log().GetMinType());
1028 
1029  // set PDEFoam parameters
1030  pdefoam->SetDim( dim);
1031  pdefoam->SetnCells( fnCells); // optional
1032  pdefoam->SetnSampl( fnSampl); // optional
1033  pdefoam->SetnBin( fnBin); // optional
1034  pdefoam->SetEvPerBin( fEvPerBin); // optional
1035 
1036  // cuts
1037  pdefoam->SetNmin(fNmin);
1038  pdefoam->SetMaxDepth(fMaxDepth); // maximum cell tree depth
1039 
1040  // Init PDEFoam
1041  pdefoam->Initialize();
1042 
1043  // Set Xmin, Xmax
1044  SetXminXmax(pdefoam);
1045 
1046  return pdefoam;
1047 }
1048 
1049 ////////////////////////////////////////////////////////////////////////////////
1050 /// Return regression values for both multi- and mono-target regression
1051 
1052 const std::vector<Float_t>& TMVA::MethodPDEFoam::GetRegressionValues()
1053 {
1054  if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>();
1055  fRegressionReturnVal->clear();
1056  fRegressionReturnVal->reserve(Data()->GetNTargets());
1057 
1058  const Event* ev = GetEvent();
1059  std::vector<Float_t> vals = ev->GetValues(); // get array of event variables (non-targets)
1060 
1061  if (vals.empty()) {
1062  Log() << kWARNING << "<GetRegressionValues> value vector is empty. " << Endl;
1063  }
1064 
1065  if (fMultiTargetRegression) {
1066  // create std::map from event variables
1067  std::map<Int_t, Float_t> xvec;
1068  for (UInt_t i=0; i<vals.size(); ++i)
1069  xvec.insert(std::pair<Int_t, Float_t>(i, vals.at(i)));
1070  // get the targets
1071  std::vector<Float_t> targets = fFoam.at(0)->GetCellValue( xvec, kValue );
1072 
1073  // sanity check
1074  if (targets.size() != Data()->GetNTargets())
1075  Log() << kFATAL << "Something wrong with multi-target regression foam: "
1076  << "number of targets does not match the DataSet()" << Endl;
1077  for(UInt_t i=0; i<targets.size(); i++)
1078  fRegressionReturnVal->push_back(targets.at(i));
1079  }
1080  else {
1081  fRegressionReturnVal->push_back(fFoam.at(0)->GetCellValue(vals, kValue, fKernelEstimator));
1082  }
1083 
1084  // apply inverse transformation to regression values
1085  Event * evT = new Event(*ev);
1086  for (UInt_t itgt = 0; itgt < Data()->GetNTargets(); itgt++) {
1087  evT->SetTarget(itgt, fRegressionReturnVal->at(itgt) );
1088  }
1089  const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
1090  fRegressionReturnVal->clear();
1091  for (UInt_t itgt = 0; itgt < Data()->GetNTargets(); itgt++) {
1092  fRegressionReturnVal->push_back( evT2->GetTarget(itgt) );
1093  }
1094 
1095  delete evT;
1096 
1097  return (*fRegressionReturnVal);
1098 }
1099 
1100 ////////////////////////////////////////////////////////////////////////////////
1101 /// create a pdefoam kernel estimator, depending on the current
1102 /// value of fKernel
1103 
1105 {
1106  switch (fKernel) {
1107  case kNone:
1108  return new PDEFoamKernelTrivial();
1109  case kLinN:
1110  return new PDEFoamKernelLinN();
1111  case kGaus:
1112  return new PDEFoamKernelGauss(fVolFrac/2.0);
1113  default:
1114  Log() << kFATAL << "Kernel: " << fKernel << " not supported!" << Endl;
1115  return NULL;
1116  }
1117  return NULL;
1118 }
1119 
1120 ////////////////////////////////////////////////////////////////////////////////
1121 /// Deletes all trained foams
1122 
1124 {
1125  for (UInt_t i=0; i<fFoam.size(); i++)
1126  if (fFoam.at(i)) delete fFoam.at(i);
1127  fFoam.clear();
1128 }
1129 
1130 ////////////////////////////////////////////////////////////////////////////////
1131 /// reset MethodPDEFoam:
1132 /// - delete all PDEFoams
1133 /// - delete the kernel estimator
1134 
1136 {
1137  DeleteFoams();
1138 
1139  if (fKernelEstimator != NULL) {
1140  delete fKernelEstimator;
1142  }
1143 }
1144 
1145 ////////////////////////////////////////////////////////////////////////////////
1146 
1148 {}
1149 
1150 ////////////////////////////////////////////////////////////////////////////////
1151 /// create XML output of PDEFoam method variables
1152 
1153 void TMVA::MethodPDEFoam::AddWeightsXMLTo( void* parent ) const
1154 {
1155  void* wght = gTools().AddChild(parent, "Weights");
1156  gTools().AddAttr( wght, "SigBgSeparated", fSigBgSeparated );
1157  gTools().AddAttr( wght, "Frac", fFrac );
1158  gTools().AddAttr( wght, "DiscrErrCut", fDiscrErrCut );
1159  gTools().AddAttr( wght, "VolFrac", fVolFrac );
1160  gTools().AddAttr( wght, "nCells", fnCells );
1161  gTools().AddAttr( wght, "nSampl", fnSampl );
1162  gTools().AddAttr( wght, "nBin", fnBin );
1163  gTools().AddAttr( wght, "EvPerBin", fEvPerBin );
1164  gTools().AddAttr( wght, "Compress", fCompress );
1165  gTools().AddAttr( wght, "DoRegression", DoRegression() );
1166  gTools().AddAttr( wght, "CutNmin", fNmin>0 );
1167  gTools().AddAttr( wght, "Nmin", fNmin );
1168  gTools().AddAttr( wght, "CutRMSmin", false );
1169  gTools().AddAttr( wght, "RMSmin", 0.0 );
1170  gTools().AddAttr( wght, "Kernel", KernelToUInt(fKernel) );
1171  gTools().AddAttr( wght, "TargetSelection", TargetSelectionToUInt(fTargetSelection) );
1172  gTools().AddAttr( wght, "FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
1173  gTools().AddAttr( wght, "UseYesNoCell", fUseYesNoCell );
1174 
1175  // save foam borders Xmin[i], Xmax[i]
1176  void *xmin_wrap;
1177  for (UInt_t i=0; i<fXmin.size(); i++){
1178  xmin_wrap = gTools().AddChild( wght, "Xmin" );
1179  gTools().AddAttr( xmin_wrap, "Index", i );
1180  gTools().AddAttr( xmin_wrap, "Value", fXmin.at(i) );
1181  }
1182  void *xmax_wrap;
1183  for (UInt_t i=0; i<fXmax.size(); i++){
1184  xmax_wrap = gTools().AddChild( wght, "Xmax" );
1185  gTools().AddAttr( xmax_wrap, "Index", i );
1186  gTools().AddAttr( xmax_wrap, "Value", fXmax.at(i) );
1187  }
1188 
1189  // write foams to xml file
1190  WriteFoamsToFile();
1191 }
1192 
1193 ////////////////////////////////////////////////////////////////////////////////
1194 /// Write PDEFoams to file
1195 
1197 {
1198  // fill variable names into foam
1200 
1201  TString rfname( GetWeightFileName() );
1202 
1203  // replace in case of txt weight file
1204  rfname.ReplaceAll( TString(".") + gConfig().GetIONames().fWeightFileExtension + ".txt", ".xml" );
1205 
1206  // add foam indicator to distinguish from main weight file
1207  rfname.ReplaceAll( ".xml", "_foams.root" );
1208 
1209  TFile *rootFile = 0;
1210  if (fCompress) rootFile = new TFile(rfname, "RECREATE", "foamfile", 9);
1211  else rootFile = new TFile(rfname, "RECREATE");
1212 
1213  // write the foams
1214  for (UInt_t i=0; i<fFoam.size(); ++i) {
1215  Log() << "writing foam " << fFoam.at(i)->GetFoamName().Data()
1216  << " to file" << Endl;
1217  fFoam.at(i)->Write(fFoam.at(i)->GetFoamName().Data());
1218  }
1219 
1220  rootFile->Close();
1221  Log() << kINFO << "Foams written to file: "
1222  << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
1223 }
1224 
1225 ////////////////////////////////////////////////////////////////////////////////
1226 /// read options and internal parameters
1227 
1229 {
1230  istr >> fSigBgSeparated; // Seperate Sig and Bg, or not
1231  istr >> fFrac; // Fraction used for calc of Xmin, Xmax
1232  istr >> fDiscrErrCut; // cut on discrimant error
1233  istr >> fVolFrac; // volume fraction (used for density calculation during buildup)
1234  istr >> fnCells; // Number of Cells (500)
1235  istr >> fnSampl; // Number of MC events per cell in build-up (1000)
1236  istr >> fnBin; // Number of bins in build-up (100)
1237  istr >> fEvPerBin; // Maximum events (equiv.) per bin in buid-up (1000)
1238  istr >> fCompress; // compress output file
1239 
1240  Bool_t regr;
1241  istr >> regr; // regression foam
1243 
1244  Bool_t CutNmin, CutRMSmin; // dummy for backwards compatib.
1245  Float_t RMSmin; // dummy for backwards compatib.
1246  istr >> CutNmin; // cut on minimal number of events in cell
1247  istr >> fNmin;
1248  istr >> CutRMSmin; // cut on minimal RMS in cell
1249  istr >> RMSmin;
1250 
1251  UInt_t ker = 0;
1252  istr >> ker; // used kernel for GetMvaValue()
1253  fKernel = UIntToKernel(ker);
1254 
1255  UInt_t ts = 0;
1256  istr >> ts; // used method for target selection
1258 
1259  istr >> fFillFoamWithOrigWeights; // fill foam with original event weights
1260  istr >> fUseYesNoCell; // return -1 or 1 for bg or signal event
1261 
1262  // clear old range and prepare new range
1263  fXmin.clear();
1264  fXmax.clear();
1265  UInt_t kDim = GetNvar();
1267  kDim += Data()->GetNTargets();
1268  fXmin.assign(kDim, 0);
1269  fXmax.assign(kDim, 0);
1270 
1271  // read range
1272  for (UInt_t i=0; i<kDim; i++)
1273  istr >> fXmin.at(i);
1274  for (UInt_t i=0; i<kDim; i++)
1275  istr >> fXmax.at(i);
1276 
1277  // read pure foams from file
1279 }
1280 
1281 ////////////////////////////////////////////////////////////////////////////////
1282 /// read PDEFoam variables from xml weight file
1283 
1285 {
1286  gTools().ReadAttr( wghtnode, "SigBgSeparated", fSigBgSeparated );
1287  gTools().ReadAttr( wghtnode, "Frac", fFrac );
1288  gTools().ReadAttr( wghtnode, "DiscrErrCut", fDiscrErrCut );
1289  gTools().ReadAttr( wghtnode, "VolFrac", fVolFrac );
1290  gTools().ReadAttr( wghtnode, "nCells", fnCells );
1291  gTools().ReadAttr( wghtnode, "nSampl", fnSampl );
1292  gTools().ReadAttr( wghtnode, "nBin", fnBin );
1293  gTools().ReadAttr( wghtnode, "EvPerBin", fEvPerBin );
1294  gTools().ReadAttr( wghtnode, "Compress", fCompress );
1295  Bool_t regr; // dummy for backwards compatib.
1296  gTools().ReadAttr( wghtnode, "DoRegression", regr );
1297  Bool_t CutNmin; // dummy for backwards compatib.
1298  gTools().ReadAttr( wghtnode, "CutNmin", CutNmin );
1299  gTools().ReadAttr( wghtnode, "Nmin", fNmin );
1300  Bool_t CutRMSmin; // dummy for backwards compatib.
1301  Float_t RMSmin; // dummy for backwards compatib.
1302  gTools().ReadAttr( wghtnode, "CutRMSmin", CutRMSmin );
1303  gTools().ReadAttr( wghtnode, "RMSmin", RMSmin );
1304  UInt_t ker = 0;
1305  gTools().ReadAttr( wghtnode, "Kernel", ker );
1306  fKernel = UIntToKernel(ker);
1307  UInt_t ts = 0;
1308  gTools().ReadAttr( wghtnode, "TargetSelection", ts );
1310  if (gTools().HasAttr(wghtnode, "FillFoamWithOrigWeights"))
1311  gTools().ReadAttr( wghtnode, "FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
1312  if (gTools().HasAttr(wghtnode, "UseYesNoCell"))
1313  gTools().ReadAttr( wghtnode, "UseYesNoCell", fUseYesNoCell );
1314 
1315  // clear old range [Xmin, Xmax] and prepare new range for reading
1316  fXmin.clear();
1317  fXmax.clear();
1318  UInt_t kDim = GetNvar();
1320  kDim += Data()->GetNTargets();
1321  fXmin.assign(kDim, 0);
1322  fXmax.assign(kDim, 0);
1323 
1324  // read foam range
1325  void *xmin_wrap = gTools().GetChild( wghtnode );
1326  for (UInt_t counter=0; counter<kDim; counter++) {
1327  UInt_t i=0;
1328  gTools().ReadAttr( xmin_wrap , "Index", i );
1329  if (i>=kDim)
1330  Log() << kFATAL << "dimension index out of range:" << i << Endl;
1331  gTools().ReadAttr( xmin_wrap , "Value", fXmin.at(i) );
1332  xmin_wrap = gTools().GetNextChild( xmin_wrap );
1333  }
1334 
1335  void *xmax_wrap = xmin_wrap;
1336  for (UInt_t counter=0; counter<kDim; counter++) {
1337  UInt_t i=0;
1338  gTools().ReadAttr( xmax_wrap , "Index", i );
1339  if (i>=kDim)
1340  Log() << kFATAL << "dimension index out of range:" << i << Endl;
1341  gTools().ReadAttr( xmax_wrap , "Value", fXmax.at(i) );
1342  xmax_wrap = gTools().GetNextChild( xmax_wrap );
1343  }
1344 
1345  // if foams exist, delete them
1346  DeleteFoams();
1347 
1348  // read pure foams from file
1350 
1351  // recreate the pdefoam kernel estimator
1352  if (fKernelEstimator != NULL)
1353  delete fKernelEstimator;
1355 }
1356 
1357 ////////////////////////////////////////////////////////////////////////////////
1358 /// Reads a foam with name 'foamname' from file, and returns a clone
1359 /// of the foam. The given ROOT file must be open. (The ROOT file
1360 /// will not be closed in this function.)
1361 ///
1362 /// Parameters:
1363 ///
1364 /// - file - an open ROOT file
1365 ///
1366 /// - foamname - name of foam to load from the file
1367 ///
1368 /// Returns:
1369 ///
1370 /// If a foam with name 'foamname' exists in the file, then it is
1371 /// read from the file, cloned and returned. If a foam with name
1372 /// 'foamname' does not exist in the file or the clone operation
1373 /// does not succeed, then NULL is returned.
1374 
1376 {
1377  if (file == NULL) {
1378  Log() << kWARNING << "<ReadClonedFoamFromFile>: NULL pointer given" << Endl;
1379  return NULL;
1380  }
1381 
1382  // try to load the foam from the file
1383  PDEFoam *foam = (PDEFoam*) file->Get(foamname);
1384  if (foam == NULL) {
1385  return NULL;
1386  }
1387  // try to clone the foam
1388  foam = (PDEFoam*) foam->Clone();
1389  if (foam == NULL) {
1390  Log() << kWARNING << "<ReadClonedFoamFromFile>: " << foamname
1391  << " could not be cloned!" << Endl;
1392  return NULL;
1393  }
1394 
1395  return foam;
1396 }
1397 
1398 ////////////////////////////////////////////////////////////////////////////////
1399 /// read foams from file
1400 
1402 {
1403  TString rfname( GetWeightFileName() );
1404 
1405  // replace in case of txt weight file
1406  rfname.ReplaceAll( TString(".") + gConfig().GetIONames().fWeightFileExtension + ".txt", ".xml" );
1407 
1408  // add foam indicator to distinguish from main weight file
1409  rfname.ReplaceAll( ".xml", "_foams.root" );
1410 
1411  Log() << kINFO << "Read foams from file: " << gTools().Color("lightblue")
1412  << rfname << gTools().Color("reset") << Endl;
1413  TFile *rootFile = new TFile( rfname, "READ" );
1414  if (rootFile->IsZombie()) Log() << kFATAL << "Cannot open file \"" << rfname << "\"" << Endl;
1415 
1416  // read foams from file
1417  if (DoRegression()) {
1419  fFoam.push_back(ReadClonedFoamFromFile(rootFile, "MultiTargetRegressionFoam"));
1420  else
1421  fFoam.push_back(ReadClonedFoamFromFile(rootFile, "MonoTargetRegressionFoam"));
1422  } else {
1423  if (fSigBgSeparated) {
1424  fFoam.push_back(ReadClonedFoamFromFile(rootFile, "SignalFoam"));
1425  fFoam.push_back(ReadClonedFoamFromFile(rootFile, "BgFoam"));
1426  } else {
1427  // try to load discriminator foam
1428  PDEFoam *foam = ReadClonedFoamFromFile(rootFile, "DiscrFoam");
1429  if (foam != NULL)
1430  fFoam.push_back(foam);
1431  else {
1432  // load multiclass foams
1433  for (UInt_t iClass=0; iClass<DataInfo().GetNClasses(); ++iClass) {
1434  fFoam.push_back(ReadClonedFoamFromFile(rootFile, Form("MultiClassFoam%u",iClass)));
1435  }
1436  }
1437  }
1438  }
1439 
1440  // Close the root file. Note, that the foams are still present in
1441  // memory!
1442  rootFile->Close();
1443  delete rootFile;
1444 
1445  for (UInt_t i=0; i<fFoam.size(); ++i) {
1446  if (!fFoam.at(0))
1447  Log() << kFATAL << "Could not load foam!" << Endl;
1448  }
1449 }
1450 
1451 ////////////////////////////////////////////////////////////////////////////////
1452 /// convert UInt_t to EKernel (used for reading weight files)
1453 
1455 {
1456  switch(iker) {
1457  case 0: return kNone;
1458  case 1: return kGaus;
1459  case 2: return kLinN;
1460  default:
1461  Log() << kWARNING << "<UIntToKernel>: unknown kernel number: " << iker << Endl;
1462  return kNone;
1463  }
1464  return kNone;
1465 }
1466 
1467 ////////////////////////////////////////////////////////////////////////////////
1468 /// convert UInt_t to ETargetSelection (used for reading weight files)
1469 
1471 {
1472  switch(its) {
1473  case 0: return kMean;
1474  case 1: return kMpv;
1475  default:
1476  Log() << kWARNING << "<UIntToTargetSelection>: unknown method TargetSelection: " << its << Endl;
1477  return kMean;
1478  }
1479  return kMean;
1480 }
1481 
1482 ////////////////////////////////////////////////////////////////////////////////
1483 /// store the variable names in all foams
1484 
1486 {
1487  for (UInt_t ifoam=0; ifoam<fFoam.size(); ifoam++) {
1488  for (Int_t idim=0; idim<fFoam.at(ifoam)->GetTotDim(); idim++) {
1490  fFoam.at(ifoam)->AddVariableName(DataInfo().GetTargetInfo(idim-DataInfo().GetNVariables()).GetExpression().Data());
1491  else
1492  fFoam.at(ifoam)->AddVariableName(DataInfo().GetVariableInfo(idim).GetExpression().Data());
1493  }
1494  }
1495 }
1496 
1497 ////////////////////////////////////////////////////////////////////////////////
1498 /// write PDEFoam-specific classifier response
1499 /// NOT IMPLEMENTED YET!
1500 
1501 void TMVA::MethodPDEFoam::MakeClassSpecific( std::ostream& /*fout*/, const TString& /*className*/ ) const
1502 {
1503 }
1504 
1505 ////////////////////////////////////////////////////////////////////////////////
1506 /// provide help message
1507 
1509 {
1510  Log() << Endl;
1511  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1512  Log() << Endl;
1513  Log() << "PDE-Foam is a variation of the PDE-RS method using a self-adapting" << Endl;
1514  Log() << "binning method to divide the multi-dimensional variable space into a" << Endl;
1515  Log() << "finite number of hyper-rectangles (cells). The binning algorithm " << Endl;
1516  Log() << "adjusts the size and position of a predefined number of cells such" << Endl;
1517  Log() << "that the variance of the signal and background densities inside the " << Endl;
1518  Log() << "cells reaches a minimum" << Endl;
1519  Log() << Endl;
1520  Log() << gTools().Color("bold") << "--- Use of booking options:" << gTools().Color("reset") << Endl;
1521  Log() << Endl;
1522  Log() << "The PDEFoam classifier supports two different algorithms: " << Endl;
1523  Log() << Endl;
1524  Log() << " (1) Create one foam, which stores the signal over background" << Endl;
1525  Log() << " probability density. During foam buildup the variance of the" << Endl;
1526  Log() << " discriminant inside the cells is minimised." << Endl;
1527  Log() << Endl;
1528  Log() << " Booking option: SigBgSeparated=F" << Endl;
1529  Log() << Endl;
1530  Log() << " (2) Create two separate foams, one for the signal events and one for" << Endl;
1531  Log() << " background events. During foam buildup the variance of the" << Endl;
1532  Log() << " event density inside the cells is minimised separately for" << Endl;
1533  Log() << " signal and background." << Endl;
1534  Log() << Endl;
1535  Log() << " Booking option: SigBgSeparated=T" << Endl;
1536  Log() << Endl;
1537  Log() << "The following options can be set (the listed values are found to be a" << Endl;
1538  Log() << "good starting point for most applications):" << Endl;
1539  Log() << Endl;
1540  Log() << " SigBgSeparate False Separate Signal and Background" << Endl;
1541  Log() << " TailCut 0.001 Fraction of outlier events that excluded" << Endl;
1542  Log() << " from the foam in each dimension " << Endl;
1543  Log() << " VolFrac 0.0666 Volume fraction (used for density calculation" << Endl;
1544  Log() << " during foam build-up) " << Endl;
1545  Log() << " nActiveCells 500 Maximal number of active cells in final foam " << Endl;
1546  Log() << " nSampl 2000 Number of MC events per cell in foam build-up " << Endl;
1547  Log() << " nBin 5 Number of bins used in foam build-up " << Endl;
1548  Log() << " Nmin 100 Number of events in cell required to split cell" << Endl;
1549  Log() << " Kernel None Kernel type used (possible valuses are: None," << Endl;
1550  Log() << " Gauss)" << Endl;
1551  Log() << " Compress True Compress foam output file " << Endl;
1552  Log() << Endl;
1553  Log() << " Additional regression options:" << Endl;
1554  Log() << Endl;
1555  Log() << "MultiTargetRegression False Do regression with multiple targets " << Endl;
1556  Log() << " TargetSelection Mean Target selection method (possible valuses are: " << Endl;
1557  Log() << " Mean, Mpv)" << Endl;
1558  Log() << Endl;
1559  Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
1560  Log() << Endl;
1561  Log() << "The performance of the two implementations was found to be similar for" << Endl;
1562  Log() << "most examples studied. For the same number of cells per foam, the two-" << Endl;
1563  Log() << "foam option approximately doubles the amount of computer memory needed" << Endl;
1564  Log() << "during classification. For special cases where the event-density" << Endl;
1565  Log() << "distribution of signal and background events is very different, the" << Endl;
1566  Log() << "two-foam option was found to perform significantly better than the" << Endl;
1567  Log() << "option with only one foam." << Endl;
1568  Log() << Endl;
1569  Log() << "In order to gain better classification performance we recommend to set" << Endl;
1570  Log() << "the parameter \"nActiveCells\" to a high value." << Endl;
1571  Log() << Endl;
1572  Log() << "The parameter \"VolFrac\" specifies the size of the sampling volume" << Endl;
1573  Log() << "during foam buildup and should be tuned in order to achieve optimal" << Endl;
1574  Log() << "performance. A larger box leads to a reduced statistical uncertainty" << Endl;
1575  Log() << "for small training samples and to smoother sampling. A smaller box on" << Endl;
1576  Log() << "the other hand increases the sensitivity to statistical fluctuations" << Endl;
1577  Log() << "in the training samples, but for sufficiently large training samples" << Endl;
1578  Log() << "it will result in a more precise local estimate of the sampled" << Endl;
1579  Log() << "density. In general, higher dimensional problems require larger box" << Endl;
1580  Log() << "sizes, due to the reduced average number of events per box volume. The" << Endl;
1581  Log() << "default value of 0.0666 was optimised for an example with 5" << Endl;
1582  Log() << "observables and training samples of the order of 50000 signal and" << Endl;
1583  Log() << "background events each." << Endl;
1584  Log() << Endl;
1585  Log() << "Furthermore kernel weighting can be activated, which will lead to an" << Endl;
1586  Log() << "additional performance improvement. Note that Gauss weighting will" << Endl;
1587  Log() << "significantly increase the response time of the method. LinNeighbors" << Endl;
1588  Log() << "weighting performs a linear interpolation with direct neighbor cells" << Endl;
1589  Log() << "for each dimension and is much faster than Gauss weighting." << Endl;
1590  Log() << Endl;
1591  Log() << "The classification results were found to be rather insensitive to the" << Endl;
1592  Log() << "values of the parameters \"nSamples\" and \"nBin\"." << Endl;
1593 }
void Train(void)
Train PDE-Foam depending on the set options.
Config & gConfig()
Definition: Config.cxx:43
PDEFoamCell * GetDau1() const
Definition: PDEFoamCell.h:101
std::vector< Float_t > fXmax
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3125
UInt_t GetNVariables() const
Definition: DataSetInfo.h:128
float xmin
Definition: THbookFile.cxx:93
virtual void Reset()
reset MethodPDEFoam:
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
long long Long64_t
Definition: RtypesCore.h:69
Bool_t fFillFoamWithOrigWeights
UInt_t KernelToUInt(EKernel ker) const
void GetNCuts(PDEFoamCell *cell, std::vector< UInt_t > &nCuts)
Fill in &#39;nCuts&#39; the number of cuts made in every foam dimension, starting at the root cell &#39;cell&#39;...
float Float_t
Definition: RtypesCore.h:53
PDEFoam * InitFoam(TString, EFoamType, UInt_t cls=0)
Create a new PDEFoam, set the PDEFoam options (nCells, nBin, Xmin, Xmax, etc.) and initialize the PDE...
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
UInt_t GetNvar() const
Definition: MethodBase.h:340
void PrintCoefficients(void)
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:302
MsgLogger & Log() const
Definition: Configurable.h:128
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:50
EAnalysisType
Definition: Types.h:129
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition: DataSet.cxx:225
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
Basic string class.
Definition: TString.h:137
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
void SetXmin(Int_t idim, Double_t wmin)
set lower foam bound in dimension idim
Definition: PDEFoam.cxx:265
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:390
int Int_t
Definition: RtypesCore.h:41
void TrainUnifiedClassification(void)
Create only one unified foam (fFoam[0]) whose cells contain the average discriminator (N_sig)/(N_sig ...
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
void FillVariableNamesToFoam() const
store the variable names in all foams
UInt_t GetNClasses() const
Definition: DataSetInfo.h:154
UInt_t GetNTargets() const
Definition: MethodBase.h:342
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Definition: TH1.cxx:8262
void box(Int_t pat, Double_t x1, Double_t y1, Double_t x2, Double_t y2)
Definition: fillpatterns.C:1
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:309
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
void GetHelpMessage() const
provide help message
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:346
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:443
UInt_t fSignalClass
Definition: MethodBase.h:683
Tools & gTools()
Definition: Tools.cxx:79
const TString & GetNormalization() const
Definition: DataSetInfo.h:132
void ReadWeightsFromStream(std::istream &i)
read options and internal parameters
virtual ~MethodPDEFoam(void)
destructor
const Event * GetEvent() const
Definition: MethodBase.h:745
DataSet * Data() const
Definition: MethodBase.h:405
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
Bool_t DoRegression() const
Definition: MethodBase.h:434
EFoamType
Definition: PDEFoam.h:74
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:76
void SetVal(UInt_t ivar, Float_t val)
set variable ivar to val
Definition: Event.cxx:337
void DeclareOptions()
Declare MethodPDEFoam options.
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
std::vector< Float_t > fXmin
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:93
void SetXminXmax(TMVA::PDEFoam *)
Set Xmin, Xmax for every dimension in the given pdefoam object.
PDEFoam * ReadClonedFoamFromFile(TFile *, const TString &)
Reads a foam with name &#39;foamname&#39; from file, and returns a clone of the foam.
void SetMaxDepth(UInt_t maxdepth)
Definition: PDEFoam.h:229
std::vector< Float_t > & GetTargets()
Definition: Event.h:105
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:413
void SetnSampl(Long_t nSampl)
Definition: PDEFoam.h:212
#define None
Definition: TGWin32.h:59
void SetNmin(UInt_t val)
Definition: PDEFoam.h:227
Bool_t DoMulticlass() const
Definition: MethodBase.h:435
void AddWeightsXMLTo(void *parent) const
create XML output of PDEFoam method variables
Int_t GetBest() const
Definition: PDEFoamCell.h:84
void SetXmax(Int_t idim, Double_t wmax)
set upper foam bound in dimension idim
Definition: PDEFoam.cxx:276
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:104
Double_t CalculateMVAError()
Calculate the error on the Mva value.
void Init(void)
default initialization called by all constructors
const char * GetName() const
Definition: MethodBase.h:330
void CalcXminXmax()
Determine foam range [fXmin, fXmax] for all dimensions, such that a fraction of &#39;fFrac&#39; events lie ou...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
PDEFoam can handle classification with multiple classes and regression with one or more regression-ta...
MethodPDEFoam(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="PDEFoam")
init PDEFoam objects
Double_t Mean(Long64_t n, const T *a, const Double_t *w=0)
Definition: TMath.h:811
PDEFoamKernelBase * fKernelEstimator
EKernel UIntToKernel(UInt_t iker)
convert UInt_t to EKernel (used for reading weight files)
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
const Handle_t kNone
Definition: GuiTypes.h:89
ETargetSelection UIntToTargetSelection(UInt_t its)
convert UInt_t to ETargetSelection (used for reading weight files)
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Return Mva-Value.
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:356
void TrainMultiTargetRegression(void)
Training one (multi target regression) foam, whose cells contain the average event density...
UInt_t TargetSelectionToUInt(ETargetSelection ts) const
void SetDensity(PDEFoamDensityBase *dens)
Definition: PDEFoam.h:216
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:296
float xmax
Definition: THbookFile.cxx:93
void SetDim(Int_t kDim)
Sets dimension of cubical space.
Definition: PDEFoam.cxx:250
void MakeClassSpecific(std::ostream &, const TString &) const
write PDEFoam-specific classifier response NOT IMPLEMENTED YET!
MsgLogger & Log() const
Definition: PDEFoam.h:262
TString GetWeightFileName() const
retrieve weight file name
void TrainSeparatedClassification(void)
Creation of 2 separated foams: one for signal events, one for backgound events.
void WriteFoamsToFile() const
Write PDEFoams to file.
UInt_t GetNVariables() const
Definition: MethodBase.h:341
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:233
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:680
#define ClassImp(name)
Definition: Rtypes.h:279
Bool_t IsZombie() const
Definition: TObject.h:120
void ReadWeightsFromXML(void *wghtnode)
read PDEFoam variables from xml weight file
double Double_t
Definition: RtypesCore.h:55
void TrainMultiClassification()
Create one unified foam (see TrainUnifiedClassification()) for each class, where the cells of foam i ...
void SetnCells(Long_t nCells)
Definition: PDEFoam.h:211
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:592
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:435
int type
Definition: TGX11.cxx:120
std::vector< PDEFoam * > fFoam
PDEFoamCell * GetDau0() const
Definition: PDEFoamCell.h:100
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void AddPreDefVal(const T &)
Definition: Configurable.h:174
void ExitFromTraining()
Definition: MethodBase.h:458
PDEFoamKernelBase * CreatePDEFoamKernel()
create a pdefoam kernel estimator, depending on the current value of fKernel
void TrainMonoTargetRegression(void)
Training one (mono target regression) foam, whose cells contain the average 0th target.
void Initialize()
Definition: PDEFoam.h:195
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition: TObject.cxx:204
const std::vector< Float_t > & GetMulticlassValues()
Get the multiclass MVA response for the PDEFoam classifier.
#define REGISTER_METHOD(CLASS)
for example
const Ranking * CreateRanking()
Compute ranking of input variables from the number of cuts made in each PDEFoam dimension.
Abstract ClassifierFactory template that handles arbitrary types.
Ranking * fRanking
Definition: MethodBase.h:581
void DeleteFoams()
Deletes all trained foams.
std::vector< Float_t > & GetValues()
Definition: Event.h:96
Int_t GetStat() const
Definition: PDEFoamCell.h:97
Definition: file.py:1
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
Definition: Ranking.cxx:86
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:590
ETargetSelection fTargetSelection
Double_t GetOriginalWeight() const
Definition: Event.h:87
void SetnBin(Int_t nBin)
Definition: PDEFoam.h:213
void SetEvPerBin(Int_t EvPerBin)
Definition: PDEFoam.h:214
#define NULL
Definition: Rtypes.h:82
EDTSeparation fDTSeparation
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:591
virtual const std::vector< Float_t > & GetRegressionValues()
Return regression values for both multi- and mono-target regression.
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
void ProcessOptions()
process user options
double exp(double)
const Bool_t kTRUE
Definition: Rtypes.h:91
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:233
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:432
void ReadFoamsFromFile()
read foams from file
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:360
virtual void Close(Option_t *option="")
Close a file.
Definition: TFile.cxx:904
const char * Data() const
Definition: TString.h:349