Logo ROOT   6.14/05
Reference Guide
DataSet.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : DataSet *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
17  * *
18  * Copyright (c) 2006: *
19  * CERN, Switzerland *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 /*! \class TMVA::DataSet
28 \ingroup TMVA
29 
30 Class that contains all the data information
31 
32 */
33 
34 #include <vector>
35 #include <algorithm>
36 #include <cstdlib>
37 #include <stdexcept>
38 #include <algorithm>
39 
40 #include "TMVA/DataSetInfo.h"
41 #include "TMVA/DataSet.h"
42 #include "TMVA/Event.h"
43 #include "TMVA/MsgLogger.h"
44 #include "TMVA/ResultsRegression.h"
46 #include "TMVA/ResultsMulticlass.h"
47 #include "TMVA/Configurable.h"
48 
49 #include "TMVA/Types.h"
50 #include "TMVA/Results.h"
51 #include "TMVA/VariableInfo.h"
52 
53 #include "TRandom3.h"
54 
55 ////////////////////////////////////////////////////////////////////////////////
56 /// constructor
57 
59  :TNamed(dsi.GetName(),"DataSet"),
60  fdsi(&dsi),
61  fEventCollection(4),
62  fCurrentTreeIdx(0),
63  fCurrentEventIdx(0),
64  fHasNegativeEventWeights(kFALSE),
65  fLogger( new MsgLogger(TString(TString("Dataset:")+dsi.GetName()).Data()) ),
66  fTrainingBlockSize(0)
67 {
68 
69  fClassEvents.resize(4);
70  fBlockBelongToTraining.reserve(10);
71  fBlockBelongToTraining.push_back(kTRUE);
72 
73  // sampling
74  fSamplingRandom = 0;
75 
76  Int_t treeNum = 2;
77  fSampling.resize( treeNum );
78  fSamplingNEvents.resize( treeNum );
79  fSamplingWeight.resize(treeNum);
80 
81  for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
82  fSampling.at(treeIdx) = kFALSE;
83  fSamplingNEvents.at(treeIdx) = 0;
84  fSamplingWeight.at(treeIdx) = 1.0;
85  }
86 }
87 
88 ////////////////////////////////////////////////////////////////////////////////
89 /// constructor
90 
92  :fdsi(new DataSetInfo(GetName())),
94  fCurrentTreeIdx(0),
97  fLogger( new MsgLogger(TString(TString("Dataset:")+GetName()).Data()) ),
99 {
100 
101  fClassEvents.resize(4);
102  fBlockBelongToTraining.reserve(10);
103  fBlockBelongToTraining.push_back(kTRUE);
104 
105  // sampling
106  fSamplingRandom = 0;
107 
108  Int_t treeNum = 2;
109  fSampling.resize( treeNum );
110  fSamplingNEvents.resize( treeNum );
111  fSamplingWeight.resize(treeNum);
112 
113  for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
114  fSampling.at(treeIdx) = kFALSE;
115  fSamplingNEvents.at(treeIdx) = 0;
116  fSamplingWeight.at(treeIdx) = 1.0;
117  }
118 }
119 
120 ////////////////////////////////////////////////////////////////////////////////
121 /// destructor
122 
124 {
125  // delete event collection
126  Bool_t deleteEvents=true; // dataset owns the events /JS
127  DestroyCollection( Types::kTraining, deleteEvents );
128  DestroyCollection( Types::kTesting, deleteEvents );
129 
130  fBlockBelongToTraining.clear();
131  // delete results
132  for (std::vector< std::map< TString, Results* > >::iterator it = fResults.begin(); it != fResults.end(); ++it) {
133  for (std::map< TString, Results* >::iterator itMap = (*it).begin(); itMap != (*it).end(); ++itMap) {
134  delete itMap->second;
135  }
136  }
137 
138  // delete sampling
139  if (fSamplingRandom != 0 ) delete fSamplingRandom;
140 
141 
142  // need also to delete fEventCollections[2] and [3], not sure if they are used
143  DestroyCollection( Types::kValidation, deleteEvents );
145 
146  delete fLogger;
147 }
148 
149 ////////////////////////////////////////////////////////////////////////////////
150 
152 {
153  if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
154  if (fClassEvents.at( type ).size() < classNumber+1) fClassEvents.at( type ).resize( classNumber+1 );
155  fClassEvents.at( type ).at( classNumber ) += 1;
156 }
157 
158 ////////////////////////////////////////////////////////////////////////////////
159 
161 {
162  if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
163  fClassEvents.at( type ).clear();
164 }
165 
166 ////////////////////////////////////////////////////////////////////////////////
167 
169 {
170  try {
171  return fClassEvents.at(type).at(classNumber);
172  }
173  catch (std::out_of_range) {
174  ClassInfo* ci = fdsi->GetClassInfo( classNumber );
175  Log() << kFATAL << Form("Dataset[%s] : ",fdsi->GetName()) << "No " << (type==0?"training":(type==1?"testing":"_unknown_type_"))
176  << " events for class " << (ci==NULL?"_no_name_known_":ci->GetName()) << " (index # "<<classNumber<<")"
177  << " available. Check if all class names are spelled correctly and if events are"
178  << " passing the selection cuts." << Endl;
179  }
180  catch (...) {
181  Log() << kFATAL << Form("Dataset[%s] : ",fdsi->GetName()) << "ERROR/CAUGHT : DataSet/GetNClassEvents, .. unknown error" << Endl;
182  }
183  return 0;
184 }
185 
186 ////////////////////////////////////////////////////////////////////////////////
187 /// destroys the event collection (events + vector)
188 
190 {
191  UInt_t i = TreeIndex(type);
192  if (i>=fEventCollection.size() || fEventCollection[i].size()==0) return;
193  if (deleteEvents) {
194 
195  for (UInt_t j=0; j<fEventCollection[i].size(); j++) delete fEventCollection[i][j];
196  }
197  fEventCollection[i].clear();
198 }
199 
200 ////////////////////////////////////////////////////////////////////////////////
201 
203 {
206  return ((fEventCollection.at(fCurrentTreeIdx))).at(iEvt);
207  }
208  else {
210  }
211 }
212 
213 ////////////////////////////////////////////////////////////////////////////////
214 /// access the number of variables through the datasetinfo
215 
217 {
218  return fdsi->GetNVariables();
219 }
220 
221 ////////////////////////////////////////////////////////////////////////////////
222 /// access the number of targets through the datasetinfo
223 
225 {
226  return fdsi->GetNTargets();
227 }
228 
229 ////////////////////////////////////////////////////////////////////////////////
230 /// access the number of targets through the datasetinfo
231 
233 {
234  return fdsi->GetNSpectators();
235 }
236 
237 ////////////////////////////////////////////////////////////////////////////////
238 /// add event to event list
239 /// after which the event is owned by the dataset
240 
242 {
243  fEventCollection.at(Int_t(type)).push_back(ev);
245 }
246 
247 ////////////////////////////////////////////////////////////////////////////////
248 /// Sets the event collection (by DataSetFactory)
249 
250 void TMVA::DataSet::SetEventCollection(std::vector<TMVA::Event*>* events, Types::ETreeType type, Bool_t deleteEvents)
251 {
252  DestroyCollection(type,deleteEvents);
253 
254  const Int_t t = TreeIndex(type);
255  ClearNClassEvents( type );
256  //pointer to std::vector is not serializable,
257  fEventCollection.at(t) = *events;
258  for (std::vector<Event*>::iterator it = fEventCollection.at(t).begin(); it < fEventCollection.at(t).end(); ++it) {
259  IncrementNClassEvents( t, (*it)->GetClass() );
260  }
261 }
262 
263 ////////////////////////////////////////////////////////////////////////////////
264 
267  Types::EAnalysisType analysistype )
268 {
269  UInt_t t = TreeIndex(type);
270  if (t<fResults.size()) {
271  const std::map< TString, Results* >& resultsForType = fResults[t];
272  std::map< TString, Results* >::const_iterator it = resultsForType.find(resultsName);
273  if (it!=resultsForType.end()) {
274  //Log() << kINFO << " GetResults("<<info<<") returns existing result." << Endl;
275  return it->second;
276  }
277  }
278  else {
279  fResults.resize(t+1);
280  }
281 
282  // nothing found
283 
284  Results * newresults = 0;
285  switch(analysistype) {
287  newresults = new ResultsClassification(fdsi,resultsName);
288  break;
289  case Types::kRegression:
290  newresults = new ResultsRegression(fdsi,resultsName);
291  break;
292  case Types::kMulticlass:
293  newresults = new ResultsMulticlass(fdsi,resultsName);
294  break;
296  newresults = new ResultsClassification(fdsi,resultsName);
297  break;
299  //Log() << kINFO << " GetResults("<<info<<") can't create new one." << Endl;
300  return 0;
301  break;
302  }
303 
304  newresults->SetTreeType( type );
305  fResults[t][resultsName] = newresults;
306 
307  //Log() << kINFO << " GetResults("<<info<<") builds new result." << Endl;
308  return newresults;
309 }
310 ////////////////////////////////////////////////////////////////////////////////
311 /// delete the results stored for this particular Method instance.
312 /// (here apparently called resultsName instead of MethodTitle
313 /// Tree type (Training, testing etc..)
314 /// Analysis Type (Classification, Multiclass, Regression etc..)
315 
316 void TMVA::DataSet::DeleteResults( const TString & resultsName,
318  Types::EAnalysisType /* analysistype */ )
319 {
320  if (fResults.empty()) return;
321 
322  if (UInt_t(type) > fResults.size()){
323  Log()<<kFATAL<< Form("Dataset[%s] : ",fdsi->GetName()) << "you asked for an Treetype (training/testing/...)"
324  << " whose index " << type << " does not exist " << Endl;
325  }
326  std::map< TString, Results* >& resultsForType = fResults[UInt_t(type)];
327  std::map< TString, Results* >::iterator it = resultsForType.find(resultsName);
328  if (it!=resultsForType.end()) {
329  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << " Delete Results previous existing result:" << resultsName
330  << " of type " << type << Endl;
331  delete it->second;
332  resultsForType.erase(it->first);
333  }
334  else {
335  Log() << kINFO << Form("Dataset[%s] : ",fdsi->GetName()) << "could not fine Result class of " << resultsName
336  << " of type " << type << " which I should have deleted" << Endl;
337  }
338 }
339 
340 ////////////////////////////////////////////////////////////////////////////////
341 /// Deletes all results currently in the dataset.
342 ///
344  Types::EAnalysisType /* analysistype */ )
345 {
346  if (fResults.empty()) return;
347 
348  if (UInt_t(type) > fResults.size()){
349  Log()<<kFATAL<< Form("Dataset[%s] : ",fdsi->GetName()) << "you asked for an Treetype (training/testing/...)"
350  << " whose index " << type << " does not exist " << Endl;
351  }
352 
353  std::map<TString, Results *> & resultsForType = fResults[UInt_t(type)];
354 
355  for (auto && it : resultsForType) {
356  auto & resultsName = it.first;
357 
358  Log() << kDEBUG << Form("Dataset[%s] : ", fdsi->GetName())
359  << " DeleteAllResults previous existing result: "
360  << resultsName << " of type " << type << Endl;
361 
362  delete it.second;
363  }
364 
365  resultsForType.clear();
366 }
367 
368 ////////////////////////////////////////////////////////////////////////////////
369 /// divide training set
370 
372 {
374  // not changing anything ??
375  if (fBlockBelongToTraining.size() == blockNum) return;
376  // storing the original training vector
377  if (fBlockBelongToTraining.size() == 1) {
378  if (fEventCollection[tOrg].size() == 0)
379  fEventCollection[tOrg].resize(fEventCollection[tTrn].size());
380  fEventCollection[tOrg].clear();
381  for (UInt_t i=0; i<fEventCollection[tTrn].size(); i++)
382  fEventCollection[tOrg].push_back(fEventCollection[tTrn][i]);
383  fClassEvents[tOrg] = fClassEvents[tTrn];
384  }
385  //reseting the event division vector
386  fBlockBelongToTraining.clear();
387  for (UInt_t i=0 ; i < blockNum ; i++) fBlockBelongToTraining.push_back(kTRUE);
388 
390 }
391 
392 ////////////////////////////////////////////////////////////////////////////////
393 /// apply division of data set
394 
396 {
398  fEventCollection[tTrn].clear();
399  if (fEventCollection[tVld].size()==0)
400  fEventCollection[tVld].resize(fEventCollection[tOrg].size());
401  fEventCollection[tVld].clear();
402 
403  //creating the new events collections, notice that the events that can't be evenly divided belong to the last event
404  for (UInt_t i=0; i<fEventCollection[tOrg].size(); i++) {
406  fEventCollection[tTrn].push_back(fEventCollection[tOrg][i]);
407  else
408  fEventCollection[tVld].push_back(fEventCollection[tOrg][i]);
409  }
410 }
411 
412 ////////////////////////////////////////////////////////////////////////////////
413 /// move training block
414 
416 {
417  if (dest == Types::kValidation)
418  fBlockBelongToTraining[blockInd]=kFALSE;
419  else
420  fBlockBelongToTraining[blockInd]=kTRUE;
421  if (applyChanges) ApplyTrainingSetDivision();
422 }
423 
424 ////////////////////////////////////////////////////////////////////////////////
425 /// return number of signal test events in dataset
426 
428 {
430 }
431 
432 ////////////////////////////////////////////////////////////////////////////////
433 /// return number of background test events in dataset
434 
436 {
437  return GetNClassEvents(Types::kTesting, fdsi->GetClassInfo("Background")->GetNumber() );
438 }
439 
440 ////////////////////////////////////////////////////////////////////////////////
441 /// return number of signal training events in dataset
442 
444 {
446 }
447 
448 ////////////////////////////////////////////////////////////////////////////////
449 /// return number of background training events in dataset
450 
452 {
453  return GetNClassEvents(Types::kTraining, fdsi->GetClassInfo("Background")->GetNumber() );
454 }
455 
456 ////////////////////////////////////////////////////////////////////////////////
457 /// initialize random or importance sampling
458 
459 void TMVA::DataSet::InitSampling( Float_t fraction, Float_t weight, UInt_t seed )
460 {
461  // add a random generator if not yet present
462  if (fSamplingRandom == 0 ) fSamplingRandom = new TRandom3( seed );
463 
464  // first, clear the lists
465  std::vector< std::pair< Float_t, Long64_t >* > evtList;
466 
467  Int_t treeIdx = TreeIndex( GetCurrentType() );
468 
469  if (fSamplingEventList.size() < UInt_t(treeIdx+1) ) fSamplingEventList.resize(treeIdx+1);
470  if (fSamplingSelected.size() < UInt_t(treeIdx+1) ) fSamplingSelected.resize(treeIdx+1);
471 
472  fSamplingEventList.at(treeIdx).clear();
473  fSamplingSelected.at(treeIdx).clear();
474 
475  if (fSampling.size() < UInt_t(treeIdx+1) ) fSampling.resize(treeIdx+1);
476  if (fSamplingNEvents.size() < UInt_t(treeIdx+1) ) fSamplingNEvents.resize(treeIdx+1);
477  if (fSamplingWeight.size() < UInt_t(treeIdx+1) ) fSamplingWeight.resize(treeIdx+1);
478 
479  if (fraction > 0.999999 || fraction < 0.0000001) {
480  fSampling.at( treeIdx ) = false;
481  fSamplingNEvents.at( treeIdx ) = 0;
482  fSamplingWeight.at( treeIdx ) = 1.0;
483  return;
484  }
485 
486  // for the initialization, the sampling has to be turned off, afterwards we will turn it on
487  fSampling.at( treeIdx ) = false;
488 
489  fSamplingNEvents.at( treeIdx ) = Int_t(fraction*GetNEvents());
490  fSamplingWeight.at( treeIdx ) = weight;
491 
492  Long64_t nEvts = GetNEvents();
493  fSamplingEventList.at( treeIdx ).reserve( nEvts );
494  fSamplingSelected.at( treeIdx ).reserve( fSamplingNEvents.at(treeIdx) );
495  for (Long64_t ievt=0; ievt<nEvts; ievt++) {
496  std::pair<Float_t,Long64_t> p(1.0,ievt);
497  fSamplingEventList.at( treeIdx ).push_back( p );
498  }
499 
500  // now turn sampling on
501  fSampling.at( treeIdx ) = true;
502 }
503 
504 
505 ////////////////////////////////////////////////////////////////////////////////
506 /// create an event sampling (random or importance sampling)
507 
509 {
510  Int_t treeIdx = TreeIndex( GetCurrentType() );
511 
512  if (!fSampling.at(treeIdx) ) return;
513 
514  if (fSamplingRandom == 0 )
515  Log() << kFATAL<< Form("Dataset[%s] : ",fdsi->GetName())
516  << "no random generator present for creating a random/importance sampling (initialized?)" << Endl;
517 
518  // delete the previous selection
519  fSamplingSelected.at(treeIdx).clear();
520 
521  // create a temporary event-list
522  std::vector< std::pair< Float_t, Long64_t > > evtList;
523  std::vector< std::pair< Float_t, Long64_t > >::iterator evtListIt;
524 
525  // some variables
526  Float_t sumWeights = 0;
527 
528  // make a copy of the event-list
529  evtList.assign( fSamplingEventList.at(treeIdx).begin(), fSamplingEventList.at(treeIdx).end() );
530 
531  // sum up all the weights (internal weights for importance sampling)
532  for (evtListIt = evtList.begin(); evtListIt != evtList.end(); ++evtListIt) {
533  sumWeights += (*evtListIt).first;
534  }
535  evtListIt = evtList.begin();
536 
537  // random numbers
538  std::vector< Float_t > rnds;
539  rnds.reserve(fSamplingNEvents.at(treeIdx));
540 
541  Float_t pos = 0;
542  for (Int_t i = 0; i < fSamplingNEvents.at(treeIdx); i++) {
543  pos = fSamplingRandom->Rndm()*sumWeights;
544  rnds.push_back( pos );
545  }
546 
547  // sort the random numbers
548  std::sort(rnds.begin(),rnds.end());
549 
550  // select the events according to the random numbers
551  std::vector< Float_t >::iterator rndsIt = rnds.begin();
552  Float_t runningSum = 0.000000001;
553  for (evtListIt = evtList.begin(); evtListIt != evtList.end();) {
554  runningSum += (*evtListIt).first;
555  if (runningSum >= (*rndsIt)) {
556  fSamplingSelected.at(treeIdx).push_back( (*evtListIt) );
557  evtListIt = evtList.erase( evtListIt );
558 
559  ++rndsIt;
560  if (rndsIt == rnds.end() ) break;
561  }
562  else {
563  ++evtListIt;
564  }
565  }
566 }
567 
568 ////////////////////////////////////////////////////////////////////////////////
569 /// increase the importance sampling weight of the event
570 /// when not successful and decrease it when successful
571 
572 void TMVA::DataSet::EventResult( Bool_t successful, Long64_t evtNumber )
573 {
574 
575  if (!fSampling.at(fCurrentTreeIdx)) return;
576  if (fSamplingWeight.at(fCurrentTreeIdx) > 0.99999999999) return;
577 
578  Long64_t start = 0;
579  Long64_t stop = fSamplingEventList.at(fCurrentTreeIdx).size() -1;
580  if (evtNumber >= 0) {
581  start = evtNumber;
582  stop = evtNumber;
583  }
584  for ( Long64_t iEvt = start; iEvt <= stop; iEvt++ ){
585  if (Long64_t(fSamplingEventList.at(fCurrentTreeIdx).size()) < iEvt) {
586  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "event number (" << iEvt
587  << ") larger than number of sampled events ("
588  << fSamplingEventList.at(fCurrentTreeIdx).size() << " of tree " << fCurrentTreeIdx << ")" << Endl;
589  return;
590  }
591  Float_t weight = fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first;
592  if (!successful) {
593  // weight /= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
594  weight /= fSamplingWeight.at(fCurrentTreeIdx);
595  if (weight > 1.0 ) weight = 1.0;
596  }
597  else {
598  // weight *= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
599  weight *= fSamplingWeight.at(fCurrentTreeIdx);
600  }
601  fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first = weight;
602  }
603 }
604 
605 ////////////////////////////////////////////////////////////////////////////////
606 /// create the test/trainings tree with all the variables, the weights, the
607 /// classes, the targets, the spectators, the MVA outputs
608 
610 {
611  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "GetTree(" << ( type==Types::kTraining ? "training" : "testing" ) << ")" << Endl;
612 
613  // the dataset does not hold the tree, this function returns a new tree every time it is called
614 
615  if (type!=Types::kTraining && type!=Types::kTesting) return 0;
616 
617  Types::ETreeType savedType = GetCurrentType();
618 
619  SetCurrentType(type);
620  const UInt_t t = TreeIndex(type);
621  if (fResults.size() <= t) {
622  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "No results for treetype " << ( type==Types::kTraining ? "training" : "testing" )
623  << " found. Size=" << fResults.size() << Endl;
624  }
625 
626  // return number of background training events in dataset
627  TString treeName( (type == Types::kTraining ? "TrainTree" : "TestTree" ) );
628  TTree *tree = new TTree(treeName,treeName);
629 
630  Float_t *varVals = new Float_t[fdsi->GetNVariables()];
631  Float_t *tgtVals = new Float_t[fdsi->GetNTargets()];
632  Float_t *visVals = new Float_t[fdsi->GetNSpectators()];
633 
634  UInt_t cls;
635  Float_t weight;
636  // TObjString *className = new TObjString();
637  char className[40];
638 
639 
640  //Float_t metVals[fResults.at(t).size()][Int_t(fdsi->GetNTargets()+1)];
641  // replace by: [Joerg]
642  Float_t **metVals = new Float_t*[fResults.at(t).size()];
643  for(UInt_t i=0; i<fResults.at(t).size(); i++ )
644  metVals[i] = new Float_t[fdsi->GetNTargets()+fdsi->GetNClasses()];
645 
646  // create branches for event-variables
647  tree->Branch( "classID", &cls, "classID/I" );
648  tree->Branch( "className", className, "className/C" );
649 
650  // create all branches for the variables
651  Int_t n = 0;
652  for (std::vector<VariableInfo>::const_iterator itVars = fdsi->GetVariableInfos().begin();
653  itVars != fdsi->GetVariableInfos().end(); ++itVars) {
654 
655  // has to be changed to take care of types different than float: TODO
656  tree->Branch( (*itVars).GetInternalName(), &varVals[n], (*itVars).GetInternalName()+TString("/F") );
657  n++;
658  }
659  // create the branches for the targets
660  n = 0;
661  for (std::vector<VariableInfo>::const_iterator itTgts = fdsi->GetTargetInfos().begin();
662  itTgts != fdsi->GetTargetInfos().end(); ++itTgts) {
663  // has to be changed to take care of types different than float: TODO
664  tree->Branch( (*itTgts).GetInternalName(), &tgtVals[n], (*itTgts).GetInternalName()+TString("/F") );
665  n++;
666  }
667  // create the branches for the spectator variables
668  n = 0;
669  for (std::vector<VariableInfo>::const_iterator itVis = fdsi->GetSpectatorInfos().begin();
670  itVis != fdsi->GetSpectatorInfos().end(); ++itVis) {
671  // has to be changed to take care of types different than float: TODO
672  tree->Branch( (*itVis).GetInternalName(), &visVals[n], (*itVis).GetInternalName()+TString("/F") );
673  n++;
674  }
675 
676  tree->Branch( "weight", &weight, "weight/F" );
677 
678  // create all the branches for the results
679  n = 0;
680  for (std::map< TString, Results* >::iterator itMethod = fResults.at(t).begin();
681  itMethod != fResults.at(t).end(); ++itMethod) {
682 
683 
684  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "analysis type: " << (itMethod->second->GetAnalysisType()==Types::kRegression ? "Regression" :
685  (itMethod->second->GetAnalysisType()==Types::kMulticlass ? "Multiclass" : "Classification" )) << Endl;
686 
687  if (itMethod->second->GetAnalysisType() == Types::kClassification) {
688  // classification
689  tree->Branch( itMethod->first, &(metVals[n][0]), itMethod->first + "/F" );
690  }
691  else if (itMethod->second->GetAnalysisType() == Types::kMulticlass) {
692  // multiclass classification
693  TString leafList("");
694  for (UInt_t iCls = 0; iCls < fdsi->GetNClasses(); iCls++) {
695  if (iCls > 0) leafList.Append( ":" );
696  leafList.Append( fdsi->GetClassInfo( iCls )->GetName() );
697  leafList.Append( "/F" );
698  }
699  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "itMethod->first " << itMethod->first << " LEAFLIST: "
700  << leafList << " itMethod->second " << itMethod->second << Endl;
701  tree->Branch( itMethod->first, (metVals[n]), leafList );
702  }
703  else if (itMethod->second->GetAnalysisType() == Types::kRegression) {
704  // regression
705  TString leafList("");
706  for (UInt_t iTgt = 0; iTgt < fdsi->GetNTargets(); iTgt++) {
707  if (iTgt > 0) leafList.Append( ":" );
708  leafList.Append( fdsi->GetTargetInfo( iTgt ).GetInternalName() );
709  // leafList.Append( fdsi->GetTargetInfo( iTgt ).GetLabel() );
710  leafList.Append( "/F" );
711  }
712  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "itMethod->first " << itMethod->first << " LEAFLIST: "
713  << leafList << " itMethod->second " << itMethod->second << Endl;
714  tree->Branch( itMethod->first, (metVals[n]), leafList );
715  }
716  else {
717  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "Unknown analysis type for result found when writing TestTree." << Endl;
718  }
719  n++;
720 
721  }
722 
723  // Sanity check, ensure all result sets have the expected number of events
724  for (auto && itMethod : fResults.at(t)) {
725  auto numEvents = GetNEvents(type);
726  auto results = itMethod.second;
727  auto resultsName = itMethod.first;
728 
729  Long64_t numEventsResults = 0;
730  auto analysisType = results->GetAnalysisType();
731  if (analysisType == Types::kClassification) {
732  numEventsResults = dynamic_cast<ResultsClassification *>(results)->GetSize();
733  } else if (analysisType == Types::kMulticlass) {
734  numEventsResults = dynamic_cast<ResultsMulticlass *>(results)->GetSize();
735  } else if (analysisType == Types::kRegression) {
736  numEventsResults = dynamic_cast<ResultsRegression *>(results)->GetSize();
737  } else {
738  Log() << kFATAL << "Unexpected analysisType." << Endl;
739  }
740 
741  if (numEventsResults != numEvents) {
742  Log() << kFATAL << "An error occurred in DataSet::GetTree. "
743  "Inconsistent size of result for result with name '"
744  << resultsName << "'."
745  << " Size is '" << std::to_string(numEventsResults)
746  << "'.'"
747  << " Expected '" << numEvents << "'." << Endl;
748  }
749  }
750 
751  // loop through all the events
752  for (Long64_t iEvt = 0; iEvt < GetNEvents( type ); iEvt++) {
753  // write the event-variables
754  const Event* ev = GetEvent( iEvt );
755  // write the classnumber and the classname
756  cls = ev->GetClass();
757  weight = ev->GetWeight();
758  strlcpy(className, fdsi->GetClassInfo( cls )->GetName(), sizeof(className));
759 
760  // write the variables, targets and spectator variables
761  for (UInt_t ivar = 0; ivar < ev->GetNVariables(); ivar++) varVals[ivar] = ev->GetValue( ivar );
762  for (UInt_t itgt = 0; itgt < ev->GetNTargets(); itgt++) tgtVals[itgt] = ev->GetTarget( itgt );
763  for (UInt_t ivis = 0; ivis < ev->GetNSpectators(); ivis++) visVals[ivis] = ev->GetSpectator( ivis );
764 
765 
766  // loop through all the results and write the branches
767  auto iMethod = 0;
768  for (auto && itMethod : fResults.at(t)) {
769  auto & results = *itMethod.second;
770  auto analysisType = results.GetAnalysisType();
771 
772  auto const & vals = results[iEvt];
773 
774  if (analysisType == Types::kClassification) {
775  metVals[iMethod][0] = vals[0];
776  } else if (analysisType == Types::kMulticlass) {
777  for (UInt_t nCls = 0; nCls < fdsi->GetNClasses(); nCls++) {
778  Float_t val = vals.at(nCls);
779  metVals[iMethod][nCls] = val;
780  }
781  } else if (analysisType == Types::kRegression) {
782  for (UInt_t nTgts = 0; nTgts < fdsi->GetNTargets(); nTgts++) {
783  Float_t val = vals.at(nTgts);
784  metVals[iMethod][nTgts] = val;
785  }
786  }
787  ++iMethod;
788  }
789  // fill the variables into the tree
790  tree->Fill();
791  }
792 
793  Log() << kHEADER //<< Form("[%s] : ",fdsi.GetName())
794  << "Created tree '" << tree->GetName() << "' with " << tree->GetEntries() << " events" << Endl << Endl;
795 
796  SetCurrentType(savedType);
797 
798  delete[] varVals;
799  delete[] tgtVals;
800  delete[] visVals;
801 
802  for(UInt_t i=0; i<fResults.at(t).size(); i++ )
803  delete[] metVals[i];
804  delete[] metVals;
805 
806  return tree;
807 }
808 
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:145
UInt_t GetNVariables() const
Definition: DataSetInfo.h:110
Random number generator class based on M.
Definition: TRandom3.h:27
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
long long Long64_t
Definition: RtypesCore.h:69
Class that is the base-class for a vector of result.
Long64_t fTrainingBlockSize
Definition: DataSet.h:180
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingSelected
Definition: DataSet.h:164
const TString & GetInternalName() const
Definition: VariableInfo.h:58
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Definition: DataSet.cxx:241
std::vector< VariableInfo > & GetSpectatorInfos()
Definition: DataSetInfo.h:104
virtual Double_t Rndm()
Machine independent random number generator.
Definition: TRandom3.cxx:100
float Float_t
Definition: RtypesCore.h:53
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingEventList
Definition: DataSet.h:163
DataSet()
constructor
Definition: DataSet.cxx:91
std::vector< std::vector< Event * > > fEventCollection
Definition: DataSet.h:152
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition: DataSet.cxx:508
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4374
TRandom3 * fSamplingRandom
Definition: DataSet.h:165
EAnalysisType
Definition: Types.h:127
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition: DataSet.cxx:216
void SetTreeType(Types::ETreeType type)
Definition: Results.h:67
Basic string class.
Definition: TString.h:131
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< Char_t > fBlockBelongToTraining
Definition: DataSet.h:176
void ClearNClassEvents(Int_t type)
Definition: DataSet.cxx:160
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:451
Class that contains all the information of a class.
Definition: ClassInfo.h:49
UInt_t TreeIndex(Types::ETreeType type) const
Definition: DataSet.h:192
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:232
The TNamed class is the base class for all named ROOT classes.
Definition: TNamed.h:29
virtual ~DataSet()
destructor
Definition: DataSet.cxx:123
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
Definition: DataSet.cxx:609
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205
UInt_t GetClass() const
Definition: Event.h:81
TString & Append(const char *cs)
Definition: TString.h:559
Class that contains all the data information.
Definition: DataSetInfo.h:60
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
std::vector< VariableInfo > & GetTargetInfos()
Definition: DataSetInfo.h:99
Bool_t fHasNegativeEventWeights
Definition: DataSet.h:172
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:320
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
Definition: DataSet.cxx:415
void ApplyTrainingSetDivision()
apply division of data set
Definition: DataSet.cxx:395
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:97
UInt_t GetNTargets() const
Definition: DataSetInfo.h:111
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
std::vector< std::vector< Long64_t > > fClassEvents
Definition: DataSet.h:169
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:427
ClassInfo * GetClassInfo(Int_t clNum) const
VariableInfo & GetTargetInfo(Int_t i)
Definition: DataSetInfo.h:101
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particular Method instance.
Definition: DataSet.cxx:316
UInt_t GetNSpectators() const
accessor to the number of spectators
Definition: Event.cxx:328
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
void DivideTrainingSet(UInt_t blockNum)
divide training set
Definition: DataSet.cxx:371
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
Definition: DataSet.cxx:189
UInt_t GetNSpectators(bool all=kTRUE) const
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:435
const DataSetInfo * fdsi
Definition: DataSet.h:150
void DeleteAllResults(Types::ETreeType type, Types::EAnalysisType analysistype)
Deletes all results currently in the dataset.
Definition: DataSet.cxx:343
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:309
const Bool_t kFALSE
Definition: RtypesCore.h:88
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:151
std::vector< Char_t > fSampling
Definition: DataSet.h:160
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition: DataSet.cxx:572
std::vector< Float_t > fSamplingWeight
Definition: DataSet.h:162
Long64_t fCurrentEventIdx
Definition: DataSet.h:157
UInt_t fCurrentTreeIdx
[train/test/...][method-identifier]
Definition: DataSet.h:156
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:443
MsgLogger & Log() const
message logger
Definition: DataSet.h:175
int type
Definition: TGX11.cxx:120
Class which takes the results of a multiclass classification.
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
void SetEventCollection(std::vector< Event *> *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
Definition: DataSet.cxx:250
std::vector< Int_t > fSamplingNEvents
Definition: DataSet.h:161
UInt_t GetNumber() const
Definition: ClassInfo.h:65
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:67
virtual Long64_t GetEntries() const
Definition: TTree.h:384
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:168
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1711
MsgLogger * fLogger
Definition: DataSet.h:174
#define dest(otri, vertexptr)
Definition: triangle.c:1040
Class that is the base-class for a vector of result.
Definition: Results.h:57
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
Definition: tree.py:1
A TTree object has a header with a name and a title.
Definition: TTree.h:70
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:224
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Definition: Event.cxx:262
const Bool_t kTRUE
Definition: RtypesCore.h:87
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition: DataSet.cxx:459
const Int_t n
Definition: legend1.C:16
const Event * GetEvent() const
Definition: DataSet.cxx:202
std::vector< VariableInfo > & GetVariableInfos()
Definition: DataSetInfo.h:94
Class that is the base-class for a vector of result.
std::vector< std::map< TString, Results *> > fResults
Definition: DataSet.h:154