Logo ROOT   6.10/09
Reference Guide
DataSet.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : DataSet *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
17  * *
18  * Copyright (c) 2006: *
19  * CERN, Switzerland *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 /*! \class TMVA::DataSet
28 \ingroup TMVA
29 
30 Class that contains all the data information
31 
32 */
33 
34 #include <vector>
35 #include <algorithm>
36 #include <cstdlib>
37 #include <stdexcept>
38 #include <algorithm>
39 
40 #include "TMVA/DataSetInfo.h"
41 #include "TMVA/DataSet.h"
42 #include "TMVA/Event.h"
43 #include "TMVA/MsgLogger.h"
44 #include "TMVA/ResultsRegression.h"
46 #include "TMVA/ResultsMulticlass.h"
47 #include "TMVA/Configurable.h"
48 
49 #include "TMVA/Types.h"
50 #include "TMVA/Results.h"
51 #include "TMVA/VariableInfo.h"
52 
53 #include "TRandom3.h"
54 
55 ////////////////////////////////////////////////////////////////////////////////
56 /// constructor
57 
59  :TNamed(dsi.GetName(),"DataSet"),
60  fdsi(&dsi),
61  fEventCollection(4),
62  fCurrentTreeIdx(0),
63  fCurrentEventIdx(0),
64  fHasNegativeEventWeights(kFALSE),
65  fLogger( new MsgLogger(TString(TString("Dataset:")+dsi.GetName()).Data()) ),
66  fTrainingBlockSize(0)
67 {
68 
69  fClassEvents.resize(4);
70  fBlockBelongToTraining.reserve(10);
71  fBlockBelongToTraining.push_back(kTRUE);
72 
73  // sampling
74  fSamplingRandom = 0;
75 
76  Int_t treeNum = 2;
77  fSampling.resize( treeNum );
78  fSamplingNEvents.resize( treeNum );
79  fSamplingWeight.resize(treeNum);
80 
81  for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
82  fSampling.at(treeIdx) = kFALSE;
83  fSamplingNEvents.at(treeIdx) = 0;
84  fSamplingWeight.at(treeIdx) = 1.0;
85  }
86 }
87 
88 ////////////////////////////////////////////////////////////////////////////////
89 /// constructor
90 
92  :fdsi(new DataSetInfo(GetName())),
94  fCurrentTreeIdx(0),
97  fLogger( new MsgLogger(TString(TString("Dataset:")+GetName()).Data()) ),
99 {
100 
101  fClassEvents.resize(4);
102  fBlockBelongToTraining.reserve(10);
103  fBlockBelongToTraining.push_back(kTRUE);
104 
105  // sampling
106  fSamplingRandom = 0;
107 
108  Int_t treeNum = 2;
109  fSampling.resize( treeNum );
110  fSamplingNEvents.resize( treeNum );
111  fSamplingWeight.resize(treeNum);
112 
113  for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
114  fSampling.at(treeIdx) = kFALSE;
115  fSamplingNEvents.at(treeIdx) = 0;
116  fSamplingWeight.at(treeIdx) = 1.0;
117  }
118 }
119 
120 ////////////////////////////////////////////////////////////////////////////////
121 /// destructor
122 
124 {
125  // delete event collection
126  Bool_t deleteEvents=true; // dataset owns the events /JS
127  DestroyCollection( Types::kTraining, deleteEvents );
128  DestroyCollection( Types::kTesting, deleteEvents );
129 
130  fBlockBelongToTraining.clear();
131  // delete results
132  for (std::vector< std::map< TString, Results* > >::iterator it = fResults.begin(); it != fResults.end(); it++) {
133  for (std::map< TString, Results* >::iterator itMap = (*it).begin(); itMap != (*it).end(); itMap++) {
134  delete itMap->second;
135  }
136  }
137 
138  // delete sampling
139  if (fSamplingRandom != 0 ) delete fSamplingRandom;
140 
141 
142  // need also to delete fEventCollections[2] and [3], not sure if they are used
143  DestroyCollection( Types::kValidation, deleteEvents );
145 
146  delete fLogger;
147 }
148 
149 ////////////////////////////////////////////////////////////////////////////////
150 
152 {
153  if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
154  if (fClassEvents.at( type ).size() < classNumber+1) fClassEvents.at( type ).resize( classNumber+1 );
155  fClassEvents.at( type ).at( classNumber ) += 1;
156 }
157 
158 ////////////////////////////////////////////////////////////////////////////////
159 
161 {
162  if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
163  fClassEvents.at( type ).clear();
164 }
165 
166 ////////////////////////////////////////////////////////////////////////////////
167 
169 {
170  try {
171  return fClassEvents.at(type).at(classNumber);
172  }
173  catch (std::out_of_range excpt) {
174  ClassInfo* ci = fdsi->GetClassInfo( classNumber );
175  Log() << kFATAL << Form("Dataset[%s] : ",fdsi->GetName()) << "No " << (type==0?"training":(type==1?"testing":"_unknown_type_"))
176  << " events for class " << (ci==NULL?"_no_name_known_":ci->GetName()) << " (index # "<<classNumber<<")"
177  << " available. Check if all class names are spelled correctly and if events are"
178  << " passing the selection cuts." << Endl;
179  }
180  catch (...) {
181  Log() << kFATAL << Form("Dataset[%s] : ",fdsi->GetName()) << "ERROR/CAUGHT : DataSet/GetNClassEvents, .. unknown error" << Endl;
182  }
183  return 0;
184 }
185 
186 ////////////////////////////////////////////////////////////////////////////////
187 /// destroys the event collection (events + vector)
188 
190 {
191  UInt_t i = TreeIndex(type);
192  if (i>=fEventCollection.size() || fEventCollection[i].size()==0) return;
193  if (deleteEvents) {
194 
195  for (UInt_t j=0; j<fEventCollection[i].size(); j++) delete fEventCollection[i][j];
196  }
197  fEventCollection[i].clear();
198 }
199 
200 ////////////////////////////////////////////////////////////////////////////////
201 
203 {
206  return ((fEventCollection.at(fCurrentTreeIdx))).at(iEvt);
207  }
208  else {
210  }
211 }
212 
213 ////////////////////////////////////////////////////////////////////////////////
214 /// access the number of variables through the datasetinfo
215 
217 {
218  return fdsi->GetNVariables();
219 }
220 
221 ////////////////////////////////////////////////////////////////////////////////
222 /// access the number of targets through the datasetinfo
223 
225 {
226  return fdsi->GetNTargets();
227 }
228 
229 ////////////////////////////////////////////////////////////////////////////////
230 /// access the number of targets through the datasetinfo
231 
233 {
234  return fdsi->GetNSpectators();
235 }
236 
237 ////////////////////////////////////////////////////////////////////////////////
238 /// add event to event list
239 /// after which the event is owned by the dataset
240 
242 {
243  fEventCollection.at(Int_t(type)).push_back(ev);
245 }
246 
247 ////////////////////////////////////////////////////////////////////////////////
248 /// Sets the event collection (by DataSetFactory)
249 
250 void TMVA::DataSet::SetEventCollection(std::vector<TMVA::Event*>* events, Types::ETreeType type, Bool_t deleteEvents)
251 {
252  DestroyCollection(type,deleteEvents);
253 
254  const Int_t t = TreeIndex(type);
255  ClearNClassEvents( type );
256  //pointer to std::vector is not serializable,
257  fEventCollection.at(t) = *events;
258  for (std::vector<Event*>::iterator it = fEventCollection.at(t).begin(); it < fEventCollection.at(t).end(); it++) {
259  IncrementNClassEvents( t, (*it)->GetClass() );
260  }
261 }
262 
263 ////////////////////////////////////////////////////////////////////////////////
264 
267  Types::EAnalysisType analysistype )
268 {
269  UInt_t t = TreeIndex(type);
270  if (t<fResults.size()) {
271  const std::map< TString, Results* >& resultsForType = fResults[t];
272  std::map< TString, Results* >::const_iterator it = resultsForType.find(resultsName);
273  if (it!=resultsForType.end()) {
274  //Log() << kINFO << " GetResults("<<info<<") returns existing result." << Endl;
275  return it->second;
276  }
277  }
278  else {
279  fResults.resize(t+1);
280  }
281 
282  // nothing found
283 
284  Results * newresults = 0;
285  switch(analysistype) {
287  newresults = new ResultsClassification(fdsi,resultsName);
288  break;
289  case Types::kRegression:
290  newresults = new ResultsRegression(fdsi,resultsName);
291  break;
292  case Types::kMulticlass:
293  newresults = new ResultsMulticlass(fdsi,resultsName);
294  break;
296  newresults = new ResultsClassification(fdsi,resultsName);
297  break;
299  //Log() << kINFO << " GetResults("<<info<<") can't create new one." << Endl;
300  return 0;
301  break;
302  }
303 
304  newresults->SetTreeType( type );
305  fResults[t][resultsName] = newresults;
306 
307  //Log() << kINFO << " GetResults("<<info<<") builds new result." << Endl;
308  return newresults;
309 }
310 ////////////////////////////////////////////////////////////////////////////////
311 /// delete the results stored for this particular Method instance.
312 /// (here apparently called resultsName instead of MethodTitle
313 /// Tree type (Training, testing etc..)
314 /// Analysis Type (Classification, Multiclass, Regression etc..)
315 
316 void TMVA::DataSet::DeleteResults( const TString & resultsName,
318  Types::EAnalysisType /* analysistype */ )
319 {
320  if (fResults.empty()) return;
321 
322  if (UInt_t(type) > fResults.size()){
323  Log()<<kFATAL<< Form("Dataset[%s] : ",fdsi->GetName()) << "you asked for an Treetype (training/testing/...)"
324  << " whose index " << type << " does not exist " << Endl;
325  }
326  std::map< TString, Results* >& resultsForType = fResults[UInt_t(type)];
327  std::map< TString, Results* >::iterator it = resultsForType.find(resultsName);
328  if (it!=resultsForType.end()) {
329  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << " Delete Results previous existing result:" << resultsName
330  << " of type " << type << Endl;
331  delete it->second;
332  resultsForType.erase(it->first);
333  }
334  else {
335  Log() << kINFO << Form("Dataset[%s] : ",fdsi->GetName()) << "could not fine Result class of " << resultsName
336  << " of type " << type << " which I should have deleted" << Endl;
337  }
338 }
339 ////////////////////////////////////////////////////////////////////////////////
340 /// divide training set
341 
343 {
345  // not changing anything ??
346  if (fBlockBelongToTraining.size() == blockNum) return;
347  // storing the original training vector
348  if (fBlockBelongToTraining.size() == 1) {
349  if (fEventCollection[tOrg].size() == 0)
350  fEventCollection[tOrg].resize(fEventCollection[tTrn].size());
351  fEventCollection[tOrg].clear();
352  for (UInt_t i=0; i<fEventCollection[tTrn].size(); i++)
353  fEventCollection[tOrg].push_back(fEventCollection[tTrn][i]);
354  fClassEvents[tOrg] = fClassEvents[tTrn];
355  }
356  //reseting the event division vector
357  fBlockBelongToTraining.clear();
358  for (UInt_t i=0 ; i < blockNum ; i++) fBlockBelongToTraining.push_back(kTRUE);
359 
361 }
362 
363 ////////////////////////////////////////////////////////////////////////////////
364 /// apply division of data set
365 
367 {
369  fEventCollection[tTrn].clear();
370  if (fEventCollection[tVld].size()==0)
371  fEventCollection[tVld].resize(fEventCollection[tOrg].size());
372  fEventCollection[tVld].clear();
373 
374  //creating the new events collections, notice that the events that can't be evenly divided belong to the last event
375  for (UInt_t i=0; i<fEventCollection[tOrg].size(); i++) {
377  fEventCollection[tTrn].push_back(fEventCollection[tOrg][i]);
378  else
379  fEventCollection[tVld].push_back(fEventCollection[tOrg][i]);
380  }
381 }
382 
383 ////////////////////////////////////////////////////////////////////////////////
384 /// move training block
385 
387 {
388  if (dest == Types::kValidation)
389  fBlockBelongToTraining[blockInd]=kFALSE;
390  else
391  fBlockBelongToTraining[blockInd]=kTRUE;
392  if (applyChanges) ApplyTrainingSetDivision();
393 }
394 
395 ////////////////////////////////////////////////////////////////////////////////
396 /// return number of signal test events in dataset
397 
399 {
401 }
402 
403 ////////////////////////////////////////////////////////////////////////////////
404 /// return number of background test events in dataset
405 
407 {
408  return GetNClassEvents(Types::kTesting, fdsi->GetClassInfo("Background")->GetNumber() );
409 }
410 
411 ////////////////////////////////////////////////////////////////////////////////
412 /// return number of signal training events in dataset
413 
415 {
417 }
418 
419 ////////////////////////////////////////////////////////////////////////////////
420 /// return number of background training events in dataset
421 
423 {
424  return GetNClassEvents(Types::kTraining, fdsi->GetClassInfo("Background")->GetNumber() );
425 }
426 
427 ////////////////////////////////////////////////////////////////////////////////
428 /// initialize random or importance sampling
429 
430 void TMVA::DataSet::InitSampling( Float_t fraction, Float_t weight, UInt_t seed )
431 {
432  // add a random generator if not yet present
433  if (fSamplingRandom == 0 ) fSamplingRandom = new TRandom3( seed );
434 
435  // first, clear the lists
436  std::vector< std::pair< Float_t, Long64_t >* > evtList;
437  std::vector< std::pair< Float_t, Long64_t >* >::iterator it;
438 
439  Int_t treeIdx = TreeIndex( GetCurrentType() );
440 
441  if (fSamplingEventList.size() < UInt_t(treeIdx+1) ) fSamplingEventList.resize(treeIdx+1);
442  if (fSamplingSelected.size() < UInt_t(treeIdx+1) ) fSamplingSelected.resize(treeIdx+1);
443 // for (it = fSamplingEventList.at(treeIdx).begin(); it != fSamplingEventList.at(treeIdx).end(); it++ ) delete (*it);
444  fSamplingEventList.at(treeIdx).clear();
445  fSamplingSelected.at(treeIdx).clear();
446 
447  if (fSampling.size() < UInt_t(treeIdx+1) ) fSampling.resize(treeIdx+1);
448  if (fSamplingNEvents.size() < UInt_t(treeIdx+1) ) fSamplingNEvents.resize(treeIdx+1);
449  if (fSamplingWeight.size() < UInt_t(treeIdx+1) ) fSamplingWeight.resize(treeIdx+1);
450 
451  if (fraction > 0.999999 || fraction < 0.0000001) {
452  fSampling.at( treeIdx ) = false;
453  fSamplingNEvents.at( treeIdx ) = 0;
454  fSamplingWeight.at( treeIdx ) = 1.0;
455  return;
456  }
457 
458  // for the initialization, the sampling has to be turned off, afterwards we will turn it on
459  fSampling.at( treeIdx ) = false;
460 
461  fSamplingNEvents.at( treeIdx ) = Int_t(fraction*GetNEvents());
462  fSamplingWeight.at( treeIdx ) = weight;
463 
464  Long64_t nEvts = GetNEvents();
465  fSamplingEventList.at( treeIdx ).reserve( nEvts );
466  fSamplingSelected.at( treeIdx ).reserve( fSamplingNEvents.at(treeIdx) );
467  for (Long64_t ievt=0; ievt<nEvts; ievt++) {
468  std::pair<Float_t,Long64_t> p(1.0,ievt);
469  fSamplingEventList.at( treeIdx ).push_back( p );
470  }
471 
472  // now turn sampling on
473  fSampling.at( treeIdx ) = true;
474 }
475 
476 
477 ////////////////////////////////////////////////////////////////////////////////
478 /// create an event sampling (random or importance sampling)
479 
481 {
482  Int_t treeIdx = TreeIndex( GetCurrentType() );
483 
484  if (!fSampling.at(treeIdx) ) return;
485 
486  if (fSamplingRandom == 0 )
487  Log() << kFATAL<< Form("Dataset[%s] : ",fdsi->GetName())
488  << "no random generator present for creating a random/importance sampling (initialized?)" << Endl;
489 
490  // delete the previous selection
491  fSamplingSelected.at(treeIdx).clear();
492 
493  // create a temporary event-list
494  std::vector< std::pair< Float_t, Long64_t > > evtList;
495  std::vector< std::pair< Float_t, Long64_t > >::iterator evtListIt;
496 
497  // some variables
498  Float_t sumWeights = 0;
499 
500  // make a copy of the event-list
501  evtList.assign( fSamplingEventList.at(treeIdx).begin(), fSamplingEventList.at(treeIdx).end() );
502 
503  // sum up all the weights (internal weights for importance sampling)
504  for (evtListIt = evtList.begin(); evtListIt != evtList.end(); evtListIt++) {
505  sumWeights += (*evtListIt).first;
506  }
507  evtListIt = evtList.begin();
508 
509  // random numbers
510  std::vector< Float_t > rnds;
511  rnds.reserve(fSamplingNEvents.at(treeIdx));
512 
513  Float_t pos = 0;
514  for (Int_t i = 0; i < fSamplingNEvents.at(treeIdx); i++) {
515  pos = fSamplingRandom->Rndm()*sumWeights;
516  rnds.push_back( pos );
517  }
518 
519  // sort the random numbers
520  std::sort(rnds.begin(),rnds.end());
521 
522  // select the events according to the random numbers
523  std::vector< Float_t >::iterator rndsIt = rnds.begin();
524  Float_t runningSum = 0.000000001;
525  for (evtListIt = evtList.begin(); evtListIt != evtList.end();) {
526  runningSum += (*evtListIt).first;
527  if (runningSum >= (*rndsIt)) {
528  fSamplingSelected.at(treeIdx).push_back( (*evtListIt) );
529  evtListIt = evtList.erase( evtListIt );
530 
531  rndsIt++;
532  if (rndsIt == rnds.end() ) break;
533  }
534  else {
535  evtListIt++;
536  }
537  }
538 }
539 
540 ////////////////////////////////////////////////////////////////////////////////
541 /// increase the importance sampling weight of the event
542 /// when not successful and decrease it when successful
543 
544 void TMVA::DataSet::EventResult( Bool_t successful, Long64_t evtNumber )
545 {
546 
547  if (!fSampling.at(fCurrentTreeIdx)) return;
548  if (fSamplingWeight.at(fCurrentTreeIdx) > 0.99999999999) return;
549 
550  Long64_t start = 0;
551  Long64_t stop = fSamplingEventList.at(fCurrentTreeIdx).size() -1;
552  if (evtNumber >= 0) {
553  start = evtNumber;
554  stop = evtNumber;
555  }
556  for ( Long64_t iEvt = start; iEvt <= stop; iEvt++ ){
557  if (Long64_t(fSamplingEventList.at(fCurrentTreeIdx).size()) < iEvt) {
558  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "event number (" << iEvt
559  << ") larger than number of sampled events ("
560  << fSamplingEventList.at(fCurrentTreeIdx).size() << " of tree " << fCurrentTreeIdx << ")" << Endl;
561  return;
562  }
563  Float_t weight = fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first;
564  if (!successful) {
565  // weight /= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
566  weight /= fSamplingWeight.at(fCurrentTreeIdx);
567  if (weight > 1.0 ) weight = 1.0;
568  }
569  else {
570  // weight *= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
571  weight *= fSamplingWeight.at(fCurrentTreeIdx);
572  }
573  fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first = weight;
574  }
575 }
576 
577 ////////////////////////////////////////////////////////////////////////////////
578 /// create the test/trainings tree with all the variables, the weights, the
579 /// classes, the targets, the spectators, the MVA outputs
580 
582 {
583  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "GetTree(" << ( type==Types::kTraining ? "training" : "testing" ) << ")" << Endl;
584 
585  // the dataset does not hold the tree, this function returns a new tree every time it is called
586 
587  if (type!=Types::kTraining && type!=Types::kTesting) return 0;
588 
589  Types::ETreeType savedType = GetCurrentType();
590 
591  SetCurrentType(type);
592  const UInt_t t = TreeIndex(type);
593  if (fResults.size() <= t) {
594  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "No results for treetype " << ( type==Types::kTraining ? "training" : "testing" )
595  << " found. Size=" << fResults.size() << Endl;
596  }
597 
598  // return number of background training events in dataset
599  TString treeName( (type == Types::kTraining ? "TrainTree" : "TestTree" ) );
600  TTree *tree = new TTree(treeName,treeName);
601 
602  Float_t *varVals = new Float_t[fdsi->GetNVariables()];
603  Float_t *tgtVals = new Float_t[fdsi->GetNTargets()];
604  Float_t *visVals = new Float_t[fdsi->GetNSpectators()];
605 
606  UInt_t cls;
607  Float_t weight;
608  // TObjString *className = new TObjString();
609  char className[40];
610 
611 
612  //Float_t metVals[fResults.at(t).size()][Int_t(fdsi->GetNTargets()+1)];
613  // replace by: [Joerg]
614  Float_t **metVals = new Float_t*[fResults.at(t).size()];
615  for(UInt_t i=0; i<fResults.at(t).size(); i++ )
616  metVals[i] = new Float_t[fdsi->GetNTargets()+fdsi->GetNClasses()];
617 
618  // create branches for event-variables
619  tree->Branch( "classID", &cls, "classID/I" );
620  tree->Branch( "className", className, "className/C" );
621 
622  // create all branches for the variables
623  Int_t n = 0;
624  for (std::vector<VariableInfo>::const_iterator itVars = fdsi->GetVariableInfos().begin();
625  itVars != fdsi->GetVariableInfos().end(); itVars++) {
626 
627  // has to be changed to take care of types different than float: TODO
628  tree->Branch( (*itVars).GetInternalName(), &varVals[n], (*itVars).GetInternalName()+TString("/F") );
629  n++;
630  }
631  // create the branches for the targets
632  n = 0;
633  for (std::vector<VariableInfo>::const_iterator itTgts = fdsi->GetTargetInfos().begin();
634  itTgts != fdsi->GetTargetInfos().end(); itTgts++) {
635  // has to be changed to take care of types different than float: TODO
636  tree->Branch( (*itTgts).GetInternalName(), &tgtVals[n], (*itTgts).GetInternalName()+TString("/F") );
637  n++;
638  }
639  // create the branches for the spectator variables
640  n = 0;
641  for (std::vector<VariableInfo>::const_iterator itVis = fdsi->GetSpectatorInfos().begin();
642  itVis != fdsi->GetSpectatorInfos().end(); itVis++) {
643  // has to be changed to take care of types different than float: TODO
644  tree->Branch( (*itVis).GetInternalName(), &visVals[n], (*itVis).GetInternalName()+TString("/F") );
645  n++;
646  }
647 
648  tree->Branch( "weight", &weight, "weight/F" );
649 
650  // create all the branches for the results
651  n = 0;
652  for (std::map< TString, Results* >::iterator itMethod = fResults.at(t).begin();
653  itMethod != fResults.at(t).end(); itMethod++) {
654 
655 
656  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "analysis type: " << (itMethod->second->GetAnalysisType()==Types::kRegression ? "Regression" :
657  (itMethod->second->GetAnalysisType()==Types::kMulticlass ? "Multiclass" : "Classification" )) << Endl;
658 
659  if (itMethod->second->GetAnalysisType() == Types::kClassification) {
660  // classification
661  tree->Branch( itMethod->first, &(metVals[n][0]), itMethod->first + "/F" );
662  }
663  else if (itMethod->second->GetAnalysisType() == Types::kMulticlass) {
664  // multiclass classification
665  TString leafList("");
666  for (UInt_t iCls = 0; iCls < fdsi->GetNClasses(); iCls++) {
667  if (iCls > 0) leafList.Append( ":" );
668  leafList.Append( fdsi->GetClassInfo( iCls )->GetName() );
669  leafList.Append( "/F" );
670  }
671  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "itMethod->first " << itMethod->first << " LEAFLIST: "
672  << leafList << " itMethod->second " << itMethod->second << Endl;
673  tree->Branch( itMethod->first, (metVals[n]), leafList );
674  }
675  else if (itMethod->second->GetAnalysisType() == Types::kRegression) {
676  // regression
677  TString leafList("");
678  for (UInt_t iTgt = 0; iTgt < fdsi->GetNTargets(); iTgt++) {
679  if (iTgt > 0) leafList.Append( ":" );
680  leafList.Append( fdsi->GetTargetInfo( iTgt ).GetInternalName() );
681  // leafList.Append( fdsi->GetTargetInfo( iTgt ).GetLabel() );
682  leafList.Append( "/F" );
683  }
684  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "itMethod->first " << itMethod->first << " LEAFLIST: "
685  << leafList << " itMethod->second " << itMethod->second << Endl;
686  tree->Branch( itMethod->first, (metVals[n]), leafList );
687  }
688  else {
689  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "Unknown analysis type for result found when writing TestTree." << Endl;
690  }
691  n++;
692 
693  }
694 
695  // loop through all the events
696  for (Long64_t iEvt = 0; iEvt < GetNEvents( type ); iEvt++) {
697  // write the event-variables
698  const Event* ev = GetEvent( iEvt );
699  // write the classnumber and the classname
700  cls = ev->GetClass();
701  weight = ev->GetWeight();
702  strlcpy(className, fdsi->GetClassInfo( cls )->GetName(), sizeof(className));
703 
704  // write the variables, targets and spectator variables
705  for (UInt_t ivar = 0; ivar < ev->GetNVariables(); ivar++) varVals[ivar] = ev->GetValue( ivar );
706  for (UInt_t itgt = 0; itgt < ev->GetNTargets(); itgt++) tgtVals[itgt] = ev->GetTarget( itgt );
707  for (UInt_t ivis = 0; ivis < ev->GetNSpectators(); ivis++) visVals[ivis] = ev->GetSpectator( ivis );
708 
709 
710  // loop through all the results and write the branches
711  n=0;
712  for (std::map<TString, Results*>::iterator itMethod = fResults.at(t).begin();
713  itMethod != fResults.at(t).end(); itMethod++) {
714  Results* results = itMethod->second;
715 
716  const std::vector< Float_t >& vals = results->operator[](iEvt);
717 
718  if (itMethod->second->GetAnalysisType() == Types::kClassification) {
719  // classification
720  metVals[n][0] = vals[0];
721  }
722  else if (itMethod->second->GetAnalysisType() == Types::kMulticlass) {
723  // multiclass classification
724  for (UInt_t nCls = 0, nClsEnd=fdsi->GetNClasses(); nCls < nClsEnd; nCls++) {
725  Float_t val = vals.at(nCls);
726  metVals[n][nCls] = val;
727  }
728  }
729  else if (itMethod->second->GetAnalysisType() == Types::kRegression) {
730  // regression
731  for (UInt_t nTgts = 0; nTgts < fdsi->GetNTargets(); nTgts++) {
732  Float_t val = vals.at(nTgts);
733  metVals[n][nTgts] = val;
734  }
735  }
736  n++;
737  }
738  // fill the variables into the tree
739  tree->Fill();
740  }
741 
742  Log() << kHEADER //<< Form("[%s] : ",fdsi.GetName())
743  << "Created tree '" << tree->GetName() << "' with " << tree->GetEntries() << " events" << Endl << Endl;
744 
745  SetCurrentType(savedType);
746 
747  delete[] varVals;
748  delete[] tgtVals;
749  delete[] visVals;
750 
751  for(UInt_t i=0; i<fResults.at(t).size(); i++ )
752  delete[] metVals[i];
753  delete[] metVals;
754 
755  return tree;
756 }
757 
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:145
UInt_t GetNVariables() const
Definition: DataSetInfo.h:110
Random number generator class based on M.
Definition: TRandom3.h:27
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
long long Long64_t
Definition: RtypesCore.h:69
Class that is the base-class for a vector of result.
Long64_t fTrainingBlockSize
Definition: DataSet.h:178
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingSelected
Definition: DataSet.h:162
const TString & GetInternalName() const
Definition: VariableInfo.h:58
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Definition: DataSet.cxx:241
std::vector< VariableInfo > & GetSpectatorInfos()
Definition: DataSetInfo.h:104
virtual Double_t Rndm()
Machine independent random number generator.
Definition: TRandom3.cxx:94
float Float_t
Definition: RtypesCore.h:53
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingEventList
Definition: DataSet.h:161
DataSet()
constructor
Definition: DataSet.cxx:91
std::vector< std::vector< Event * > > fEventCollection
Definition: DataSet.h:150
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition: DataSet.cxx:480
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4383
TRandom3 * fSamplingRandom
Definition: DataSet.h:163
EAnalysisType
Definition: Types.h:125
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition: DataSet.cxx:216
void SetTreeType(Types::ETreeType type)
Definition: Results.h:67
Basic string class.
Definition: TString.h:129
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< Char_t > fBlockBelongToTraining
Definition: DataSet.h:174
void ClearNClassEvents(Int_t type)
Definition: DataSet.cxx:160
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
#define NULL
Definition: RtypesCore.h:88
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:422
Class that contains all the information of a class.
Definition: ClassInfo.h:49
UInt_t TreeIndex(Types::ETreeType type) const
Definition: DataSet.h:190
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:232
The TNamed class is the base class for all named ROOT classes.
Definition: TNamed.h:29
virtual ~DataSet()
destructor
Definition: DataSet.cxx:123
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
Definition: DataSet.cxx:581
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:203
UInt_t GetClass() const
Definition: Event.h:81
TString & Append(const char *cs)
Definition: TString.h:497
std::vector< std::vector< double > > Data
Class that contains all the data information.
Definition: DataSetInfo.h:60
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
std::vector< VariableInfo > & GetTargetInfos()
Definition: DataSetInfo.h:99
Bool_t fHasNegativeEventWeights
Definition: DataSet.h:170
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:320
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
Definition: DataSet.cxx:386
void ApplyTrainingSetDivision()
apply division of data set
Definition: DataSet.cxx:366
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:97
UInt_t GetNTargets() const
Definition: DataSetInfo.h:111
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
std::vector< std::vector< Long64_t > > fClassEvents
Definition: DataSet.h:167
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:398
ClassInfo * GetClassInfo(Int_t clNum) const
VariableInfo & GetTargetInfo(Int_t i)
Definition: DataSetInfo.h:101
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particular Method instance.
Definition: DataSet.cxx:316
UInt_t GetNSpectators() const
accessor to the number of spectators
Definition: Event.cxx:328
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
void DivideTrainingSet(UInt_t blockNum)
divide training set
Definition: DataSet.cxx:342
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
Definition: DataSet.cxx:189
UInt_t GetNSpectators(bool all=kTRUE) const
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:406
const DataSetInfo * fdsi
Definition: DataSet.h:148
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:309
const Bool_t kFALSE
Definition: RtypesCore.h:92
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:151
std::vector< Char_t > fSampling
Definition: DataSet.h:158
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition: DataSet.cxx:544
std::vector< Float_t > fSamplingWeight
Definition: DataSet.h:160
Long64_t fCurrentEventIdx
Definition: DataSet.h:155
UInt_t fCurrentTreeIdx
[train/test/...][method-identifier]
Definition: DataSet.h:154
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:414
MsgLogger & Log() const
message logger
Definition: DataSet.h:173
int type
Definition: TGX11.cxx:120
Class which takes the results of a multiclass classification.
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
void SetEventCollection(std::vector< Event *> *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
Definition: DataSet.cxx:250
std::vector< Int_t > fSamplingNEvents
Definition: DataSet.h:159
UInt_t GetNumber() const
Definition: ClassInfo.h:65
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:67
virtual Long64_t GetEntries() const
Definition: TTree.h:381
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:168
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1660
MsgLogger * fLogger
Definition: DataSet.h:172
#define dest(otri, vertexptr)
Definition: triangle.c:1040
Class that is the base-class for a vector of result.
Definition: Results.h:57
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:215
Definition: tree.py:1
A TTree object has a header with a name and a title.
Definition: TTree.h:78
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:224
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Definition: Event.cxx:262
const Bool_t kTRUE
Definition: RtypesCore.h:91
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition: DataSet.cxx:430
const Int_t n
Definition: legend1.C:16
const Event * GetEvent() const
Definition: DataSet.cxx:202
std::vector< VariableInfo > & GetVariableInfos()
Definition: DataSetInfo.h:94
Class that is the base-class for a vector of result.
std::vector< std::map< TString, Results *> > fResults
Definition: DataSet.h:152