Logo ROOT   6.12/07
Reference Guide
DataSet.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : DataSet *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
17  * *
18  * Copyright (c) 2006: *
19  * CERN, Switzerland *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 /*! \class TMVA::DataSet
28 \ingroup TMVA
29 
30 Class that contains all the data information
31 
32 */
33 
34 #include <vector>
35 #include <algorithm>
36 #include <cstdlib>
37 #include <stdexcept>
38 #include <algorithm>
39 
40 #include "TMVA/DataSetInfo.h"
41 #include "TMVA/DataSet.h"
42 #include "TMVA/Event.h"
43 #include "TMVA/MsgLogger.h"
44 #include "TMVA/ResultsRegression.h"
46 #include "TMVA/ResultsMulticlass.h"
47 #include "TMVA/Configurable.h"
48 
49 #include "TMVA/Types.h"
50 #include "TMVA/Results.h"
51 #include "TMVA/VariableInfo.h"
52 
53 #include "TRandom3.h"
54 
55 ////////////////////////////////////////////////////////////////////////////////
56 /// constructor
57 
59  :TNamed(dsi.GetName(),"DataSet"),
60  fdsi(&dsi),
61  fEventCollection(4),
62  fCurrentTreeIdx(0),
63  fCurrentEventIdx(0),
64  fHasNegativeEventWeights(kFALSE),
65  fLogger( new MsgLogger(TString(TString("Dataset:")+dsi.GetName()).Data()) ),
66  fTrainingBlockSize(0)
67 {
68 
69  fClassEvents.resize(4);
70  fBlockBelongToTraining.reserve(10);
71  fBlockBelongToTraining.push_back(kTRUE);
72 
73  // sampling
74  fSamplingRandom = 0;
75 
76  Int_t treeNum = 2;
77  fSampling.resize( treeNum );
78  fSamplingNEvents.resize( treeNum );
79  fSamplingWeight.resize(treeNum);
80 
81  for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
82  fSampling.at(treeIdx) = kFALSE;
83  fSamplingNEvents.at(treeIdx) = 0;
84  fSamplingWeight.at(treeIdx) = 1.0;
85  }
86 }
87 
88 ////////////////////////////////////////////////////////////////////////////////
89 /// constructor
90 
92  :fdsi(new DataSetInfo(GetName())),
94  fCurrentTreeIdx(0),
97  fLogger( new MsgLogger(TString(TString("Dataset:")+GetName()).Data()) ),
99 {
100 
101  fClassEvents.resize(4);
102  fBlockBelongToTraining.reserve(10);
103  fBlockBelongToTraining.push_back(kTRUE);
104 
105  // sampling
106  fSamplingRandom = 0;
107 
108  Int_t treeNum = 2;
109  fSampling.resize( treeNum );
110  fSamplingNEvents.resize( treeNum );
111  fSamplingWeight.resize(treeNum);
112 
113  for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
114  fSampling.at(treeIdx) = kFALSE;
115  fSamplingNEvents.at(treeIdx) = 0;
116  fSamplingWeight.at(treeIdx) = 1.0;
117  }
118 }
119 
120 ////////////////////////////////////////////////////////////////////////////////
121 /// destructor
122 
124 {
125  // delete event collection
126  Bool_t deleteEvents=true; // dataset owns the events /JS
127  DestroyCollection( Types::kTraining, deleteEvents );
128  DestroyCollection( Types::kTesting, deleteEvents );
129 
130  fBlockBelongToTraining.clear();
131  // delete results
132  for (std::vector< std::map< TString, Results* > >::iterator it = fResults.begin(); it != fResults.end(); it++) {
133  for (std::map< TString, Results* >::iterator itMap = (*it).begin(); itMap != (*it).end(); itMap++) {
134  delete itMap->second;
135  }
136  }
137 
138  // delete sampling
139  if (fSamplingRandom != 0 ) delete fSamplingRandom;
140 
141 
142  // need also to delete fEventCollections[2] and [3], not sure if they are used
143  DestroyCollection( Types::kValidation, deleteEvents );
145 
146  delete fLogger;
147 }
148 
149 ////////////////////////////////////////////////////////////////////////////////
150 
152 {
153  if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
154  if (fClassEvents.at( type ).size() < classNumber+1) fClassEvents.at( type ).resize( classNumber+1 );
155  fClassEvents.at( type ).at( classNumber ) += 1;
156 }
157 
158 ////////////////////////////////////////////////////////////////////////////////
159 
161 {
162  if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
163  fClassEvents.at( type ).clear();
164 }
165 
166 ////////////////////////////////////////////////////////////////////////////////
167 
169 {
170  try {
171  return fClassEvents.at(type).at(classNumber);
172  }
173  catch (std::out_of_range excpt) {
174  ClassInfo* ci = fdsi->GetClassInfo( classNumber );
175  Log() << kFATAL << Form("Dataset[%s] : ",fdsi->GetName()) << "No " << (type==0?"training":(type==1?"testing":"_unknown_type_"))
176  << " events for class " << (ci==NULL?"_no_name_known_":ci->GetName()) << " (index # "<<classNumber<<")"
177  << " available. Check if all class names are spelled correctly and if events are"
178  << " passing the selection cuts." << Endl;
179  }
180  catch (...) {
181  Log() << kFATAL << Form("Dataset[%s] : ",fdsi->GetName()) << "ERROR/CAUGHT : DataSet/GetNClassEvents, .. unknown error" << Endl;
182  }
183  return 0;
184 }
185 
186 ////////////////////////////////////////////////////////////////////////////////
187 /// destroys the event collection (events + vector)
188 
190 {
191  UInt_t i = TreeIndex(type);
192  if (i>=fEventCollection.size() || fEventCollection[i].size()==0) return;
193  if (deleteEvents) {
194 
195  for (UInt_t j=0; j<fEventCollection[i].size(); j++) delete fEventCollection[i][j];
196  }
197  fEventCollection[i].clear();
198 }
199 
200 ////////////////////////////////////////////////////////////////////////////////
201 
203 {
206  return ((fEventCollection.at(fCurrentTreeIdx))).at(iEvt);
207  }
208  else {
210  }
211 }
212 
213 ////////////////////////////////////////////////////////////////////////////////
214 /// access the number of variables through the datasetinfo
215 
217 {
218  return fdsi->GetNVariables();
219 }
220 
221 ////////////////////////////////////////////////////////////////////////////////
222 /// access the number of targets through the datasetinfo
223 
225 {
226  return fdsi->GetNTargets();
227 }
228 
229 ////////////////////////////////////////////////////////////////////////////////
230 /// access the number of targets through the datasetinfo
231 
233 {
234  return fdsi->GetNSpectators();
235 }
236 
237 ////////////////////////////////////////////////////////////////////////////////
238 /// add event to event list
239 /// after which the event is owned by the dataset
240 
242 {
243  fEventCollection.at(Int_t(type)).push_back(ev);
245 }
246 
247 ////////////////////////////////////////////////////////////////////////////////
248 /// Sets the event collection (by DataSetFactory)
249 
250 void TMVA::DataSet::SetEventCollection(std::vector<TMVA::Event*>* events, Types::ETreeType type, Bool_t deleteEvents)
251 {
252  DestroyCollection(type,deleteEvents);
253 
254  const Int_t t = TreeIndex(type);
255  ClearNClassEvents( type );
256  //pointer to std::vector is not serializable,
257  fEventCollection.at(t) = *events;
258  for (std::vector<Event*>::iterator it = fEventCollection.at(t).begin(); it < fEventCollection.at(t).end(); it++) {
259  IncrementNClassEvents( t, (*it)->GetClass() );
260  }
261 }
262 
263 ////////////////////////////////////////////////////////////////////////////////
264 
267  Types::EAnalysisType analysistype )
268 {
269  UInt_t t = TreeIndex(type);
270  if (t<fResults.size()) {
271  const std::map< TString, Results* >& resultsForType = fResults[t];
272  std::map< TString, Results* >::const_iterator it = resultsForType.find(resultsName);
273  if (it!=resultsForType.end()) {
274  //Log() << kINFO << " GetResults("<<info<<") returns existing result." << Endl;
275  return it->second;
276  }
277  }
278  else {
279  fResults.resize(t+1);
280  }
281 
282  // nothing found
283 
284  Results * newresults = 0;
285  switch(analysistype) {
287  newresults = new ResultsClassification(fdsi,resultsName);
288  break;
289  case Types::kRegression:
290  newresults = new ResultsRegression(fdsi,resultsName);
291  break;
292  case Types::kMulticlass:
293  newresults = new ResultsMulticlass(fdsi,resultsName);
294  break;
296  newresults = new ResultsClassification(fdsi,resultsName);
297  break;
299  //Log() << kINFO << " GetResults("<<info<<") can't create new one." << Endl;
300  return 0;
301  break;
302  }
303 
304  newresults->SetTreeType( type );
305  fResults[t][resultsName] = newresults;
306 
307  //Log() << kINFO << " GetResults("<<info<<") builds new result." << Endl;
308  return newresults;
309 }
310 ////////////////////////////////////////////////////////////////////////////////
311 /// delete the results stored for this particular Method instance.
312 /// (here apparently called resultsName instead of MethodTitle
313 /// Tree type (Training, testing etc..)
314 /// Analysis Type (Classification, Multiclass, Regression etc..)
315 
316 void TMVA::DataSet::DeleteResults( const TString & resultsName,
318  Types::EAnalysisType /* analysistype */ )
319 {
320  if (fResults.empty()) return;
321 
322  if (UInt_t(type) > fResults.size()){
323  Log()<<kFATAL<< Form("Dataset[%s] : ",fdsi->GetName()) << "you asked for an Treetype (training/testing/...)"
324  << " whose index " << type << " does not exist " << Endl;
325  }
326  std::map< TString, Results* >& resultsForType = fResults[UInt_t(type)];
327  std::map< TString, Results* >::iterator it = resultsForType.find(resultsName);
328  if (it!=resultsForType.end()) {
329  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << " Delete Results previous existing result:" << resultsName
330  << " of type " << type << Endl;
331  delete it->second;
332  resultsForType.erase(it->first);
333  }
334  else {
335  Log() << kINFO << Form("Dataset[%s] : ",fdsi->GetName()) << "could not fine Result class of " << resultsName
336  << " of type " << type << " which I should have deleted" << Endl;
337  }
338 }
339 ////////////////////////////////////////////////////////////////////////////////
340 /// divide training set
341 
343 {
345  // not changing anything ??
346  if (fBlockBelongToTraining.size() == blockNum) return;
347  // storing the original training vector
348  if (fBlockBelongToTraining.size() == 1) {
349  if (fEventCollection[tOrg].size() == 0)
350  fEventCollection[tOrg].resize(fEventCollection[tTrn].size());
351  fEventCollection[tOrg].clear();
352  for (UInt_t i=0; i<fEventCollection[tTrn].size(); i++)
353  fEventCollection[tOrg].push_back(fEventCollection[tTrn][i]);
354  fClassEvents[tOrg] = fClassEvents[tTrn];
355  }
356  //reseting the event division vector
357  fBlockBelongToTraining.clear();
358  for (UInt_t i=0 ; i < blockNum ; i++) fBlockBelongToTraining.push_back(kTRUE);
359 
361 }
362 
363 ////////////////////////////////////////////////////////////////////////////////
364 /// apply division of data set
365 
367 {
369  fEventCollection[tTrn].clear();
370  if (fEventCollection[tVld].size()==0)
371  fEventCollection[tVld].resize(fEventCollection[tOrg].size());
372  fEventCollection[tVld].clear();
373 
374  //creating the new events collections, notice that the events that can't be evenly divided belong to the last event
375  for (UInt_t i=0; i<fEventCollection[tOrg].size(); i++) {
377  fEventCollection[tTrn].push_back(fEventCollection[tOrg][i]);
378  else
379  fEventCollection[tVld].push_back(fEventCollection[tOrg][i]);
380  }
381 }
382 
383 ////////////////////////////////////////////////////////////////////////////////
384 /// move training block
385 
387 {
388  if (dest == Types::kValidation)
389  fBlockBelongToTraining[blockInd]=kFALSE;
390  else
391  fBlockBelongToTraining[blockInd]=kTRUE;
392  if (applyChanges) ApplyTrainingSetDivision();
393 }
394 
395 ////////////////////////////////////////////////////////////////////////////////
396 /// return number of signal test events in dataset
397 
399 {
401 }
402 
403 ////////////////////////////////////////////////////////////////////////////////
404 /// return number of background test events in dataset
405 
407 {
408  return GetNClassEvents(Types::kTesting, fdsi->GetClassInfo("Background")->GetNumber() );
409 }
410 
411 ////////////////////////////////////////////////////////////////////////////////
412 /// return number of signal training events in dataset
413 
415 {
417 }
418 
419 ////////////////////////////////////////////////////////////////////////////////
420 /// return number of background training events in dataset
421 
423 {
424  return GetNClassEvents(Types::kTraining, fdsi->GetClassInfo("Background")->GetNumber() );
425 }
426 
427 ////////////////////////////////////////////////////////////////////////////////
428 /// initialize random or importance sampling
429 
430 void TMVA::DataSet::InitSampling( Float_t fraction, Float_t weight, UInt_t seed )
431 {
432  // add a random generator if not yet present
433  if (fSamplingRandom == 0 ) fSamplingRandom = new TRandom3( seed );
434 
435  // first, clear the lists
436  std::vector< std::pair< Float_t, Long64_t >* > evtList;
437 
438  Int_t treeIdx = TreeIndex( GetCurrentType() );
439 
440  if (fSamplingEventList.size() < UInt_t(treeIdx+1) ) fSamplingEventList.resize(treeIdx+1);
441  if (fSamplingSelected.size() < UInt_t(treeIdx+1) ) fSamplingSelected.resize(treeIdx+1);
442 
443  fSamplingEventList.at(treeIdx).clear();
444  fSamplingSelected.at(treeIdx).clear();
445 
446  if (fSampling.size() < UInt_t(treeIdx+1) ) fSampling.resize(treeIdx+1);
447  if (fSamplingNEvents.size() < UInt_t(treeIdx+1) ) fSamplingNEvents.resize(treeIdx+1);
448  if (fSamplingWeight.size() < UInt_t(treeIdx+1) ) fSamplingWeight.resize(treeIdx+1);
449 
450  if (fraction > 0.999999 || fraction < 0.0000001) {
451  fSampling.at( treeIdx ) = false;
452  fSamplingNEvents.at( treeIdx ) = 0;
453  fSamplingWeight.at( treeIdx ) = 1.0;
454  return;
455  }
456 
457  // for the initialization, the sampling has to be turned off, afterwards we will turn it on
458  fSampling.at( treeIdx ) = false;
459 
460  fSamplingNEvents.at( treeIdx ) = Int_t(fraction*GetNEvents());
461  fSamplingWeight.at( treeIdx ) = weight;
462 
463  Long64_t nEvts = GetNEvents();
464  fSamplingEventList.at( treeIdx ).reserve( nEvts );
465  fSamplingSelected.at( treeIdx ).reserve( fSamplingNEvents.at(treeIdx) );
466  for (Long64_t ievt=0; ievt<nEvts; ievt++) {
467  std::pair<Float_t,Long64_t> p(1.0,ievt);
468  fSamplingEventList.at( treeIdx ).push_back( p );
469  }
470 
471  // now turn sampling on
472  fSampling.at( treeIdx ) = true;
473 }
474 
475 
476 ////////////////////////////////////////////////////////////////////////////////
477 /// create an event sampling (random or importance sampling)
478 
480 {
481  Int_t treeIdx = TreeIndex( GetCurrentType() );
482 
483  if (!fSampling.at(treeIdx) ) return;
484 
485  if (fSamplingRandom == 0 )
486  Log() << kFATAL<< Form("Dataset[%s] : ",fdsi->GetName())
487  << "no random generator present for creating a random/importance sampling (initialized?)" << Endl;
488 
489  // delete the previous selection
490  fSamplingSelected.at(treeIdx).clear();
491 
492  // create a temporary event-list
493  std::vector< std::pair< Float_t, Long64_t > > evtList;
494  std::vector< std::pair< Float_t, Long64_t > >::iterator evtListIt;
495 
496  // some variables
497  Float_t sumWeights = 0;
498 
499  // make a copy of the event-list
500  evtList.assign( fSamplingEventList.at(treeIdx).begin(), fSamplingEventList.at(treeIdx).end() );
501 
502  // sum up all the weights (internal weights for importance sampling)
503  for (evtListIt = evtList.begin(); evtListIt != evtList.end(); evtListIt++) {
504  sumWeights += (*evtListIt).first;
505  }
506  evtListIt = evtList.begin();
507 
508  // random numbers
509  std::vector< Float_t > rnds;
510  rnds.reserve(fSamplingNEvents.at(treeIdx));
511 
512  Float_t pos = 0;
513  for (Int_t i = 0; i < fSamplingNEvents.at(treeIdx); i++) {
514  pos = fSamplingRandom->Rndm()*sumWeights;
515  rnds.push_back( pos );
516  }
517 
518  // sort the random numbers
519  std::sort(rnds.begin(),rnds.end());
520 
521  // select the events according to the random numbers
522  std::vector< Float_t >::iterator rndsIt = rnds.begin();
523  Float_t runningSum = 0.000000001;
524  for (evtListIt = evtList.begin(); evtListIt != evtList.end();) {
525  runningSum += (*evtListIt).first;
526  if (runningSum >= (*rndsIt)) {
527  fSamplingSelected.at(treeIdx).push_back( (*evtListIt) );
528  evtListIt = evtList.erase( evtListIt );
529 
530  rndsIt++;
531  if (rndsIt == rnds.end() ) break;
532  }
533  else {
534  evtListIt++;
535  }
536  }
537 }
538 
539 ////////////////////////////////////////////////////////////////////////////////
540 /// increase the importance sampling weight of the event
541 /// when not successful and decrease it when successful
542 
543 void TMVA::DataSet::EventResult( Bool_t successful, Long64_t evtNumber )
544 {
545 
546  if (!fSampling.at(fCurrentTreeIdx)) return;
547  if (fSamplingWeight.at(fCurrentTreeIdx) > 0.99999999999) return;
548 
549  Long64_t start = 0;
550  Long64_t stop = fSamplingEventList.at(fCurrentTreeIdx).size() -1;
551  if (evtNumber >= 0) {
552  start = evtNumber;
553  stop = evtNumber;
554  }
555  for ( Long64_t iEvt = start; iEvt <= stop; iEvt++ ){
556  if (Long64_t(fSamplingEventList.at(fCurrentTreeIdx).size()) < iEvt) {
557  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "event number (" << iEvt
558  << ") larger than number of sampled events ("
559  << fSamplingEventList.at(fCurrentTreeIdx).size() << " of tree " << fCurrentTreeIdx << ")" << Endl;
560  return;
561  }
562  Float_t weight = fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first;
563  if (!successful) {
564  // weight /= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
565  weight /= fSamplingWeight.at(fCurrentTreeIdx);
566  if (weight > 1.0 ) weight = 1.0;
567  }
568  else {
569  // weight *= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
570  weight *= fSamplingWeight.at(fCurrentTreeIdx);
571  }
572  fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first = weight;
573  }
574 }
575 
576 ////////////////////////////////////////////////////////////////////////////////
577 /// create the test/trainings tree with all the variables, the weights, the
578 /// classes, the targets, the spectators, the MVA outputs
579 
581 {
582  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "GetTree(" << ( type==Types::kTraining ? "training" : "testing" ) << ")" << Endl;
583 
584  // the dataset does not hold the tree, this function returns a new tree every time it is called
585 
586  if (type!=Types::kTraining && type!=Types::kTesting) return 0;
587 
588  Types::ETreeType savedType = GetCurrentType();
589 
590  SetCurrentType(type);
591  const UInt_t t = TreeIndex(type);
592  if (fResults.size() <= t) {
593  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "No results for treetype " << ( type==Types::kTraining ? "training" : "testing" )
594  << " found. Size=" << fResults.size() << Endl;
595  }
596 
597  // return number of background training events in dataset
598  TString treeName( (type == Types::kTraining ? "TrainTree" : "TestTree" ) );
599  TTree *tree = new TTree(treeName,treeName);
600 
601  Float_t *varVals = new Float_t[fdsi->GetNVariables()];
602  Float_t *tgtVals = new Float_t[fdsi->GetNTargets()];
603  Float_t *visVals = new Float_t[fdsi->GetNSpectators()];
604 
605  UInt_t cls;
606  Float_t weight;
607  // TObjString *className = new TObjString();
608  char className[40];
609 
610 
611  //Float_t metVals[fResults.at(t).size()][Int_t(fdsi->GetNTargets()+1)];
612  // replace by: [Joerg]
613  Float_t **metVals = new Float_t*[fResults.at(t).size()];
614  for(UInt_t i=0; i<fResults.at(t).size(); i++ )
615  metVals[i] = new Float_t[fdsi->GetNTargets()+fdsi->GetNClasses()];
616 
617  // create branches for event-variables
618  tree->Branch( "classID", &cls, "classID/I" );
619  tree->Branch( "className", className, "className/C" );
620 
621  // create all branches for the variables
622  Int_t n = 0;
623  for (std::vector<VariableInfo>::const_iterator itVars = fdsi->GetVariableInfos().begin();
624  itVars != fdsi->GetVariableInfos().end(); itVars++) {
625 
626  // has to be changed to take care of types different than float: TODO
627  tree->Branch( (*itVars).GetInternalName(), &varVals[n], (*itVars).GetInternalName()+TString("/F") );
628  n++;
629  }
630  // create the branches for the targets
631  n = 0;
632  for (std::vector<VariableInfo>::const_iterator itTgts = fdsi->GetTargetInfos().begin();
633  itTgts != fdsi->GetTargetInfos().end(); itTgts++) {
634  // has to be changed to take care of types different than float: TODO
635  tree->Branch( (*itTgts).GetInternalName(), &tgtVals[n], (*itTgts).GetInternalName()+TString("/F") );
636  n++;
637  }
638  // create the branches for the spectator variables
639  n = 0;
640  for (std::vector<VariableInfo>::const_iterator itVis = fdsi->GetSpectatorInfos().begin();
641  itVis != fdsi->GetSpectatorInfos().end(); itVis++) {
642  // has to be changed to take care of types different than float: TODO
643  tree->Branch( (*itVis).GetInternalName(), &visVals[n], (*itVis).GetInternalName()+TString("/F") );
644  n++;
645  }
646 
647  tree->Branch( "weight", &weight, "weight/F" );
648 
649  // create all the branches for the results
650  n = 0;
651  for (std::map< TString, Results* >::iterator itMethod = fResults.at(t).begin();
652  itMethod != fResults.at(t).end(); itMethod++) {
653 
654 
655  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "analysis type: " << (itMethod->second->GetAnalysisType()==Types::kRegression ? "Regression" :
656  (itMethod->second->GetAnalysisType()==Types::kMulticlass ? "Multiclass" : "Classification" )) << Endl;
657 
658  if (itMethod->second->GetAnalysisType() == Types::kClassification) {
659  // classification
660  tree->Branch( itMethod->first, &(metVals[n][0]), itMethod->first + "/F" );
661  }
662  else if (itMethod->second->GetAnalysisType() == Types::kMulticlass) {
663  // multiclass classification
664  TString leafList("");
665  for (UInt_t iCls = 0; iCls < fdsi->GetNClasses(); iCls++) {
666  if (iCls > 0) leafList.Append( ":" );
667  leafList.Append( fdsi->GetClassInfo( iCls )->GetName() );
668  leafList.Append( "/F" );
669  }
670  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "itMethod->first " << itMethod->first << " LEAFLIST: "
671  << leafList << " itMethod->second " << itMethod->second << Endl;
672  tree->Branch( itMethod->first, (metVals[n]), leafList );
673  }
674  else if (itMethod->second->GetAnalysisType() == Types::kRegression) {
675  // regression
676  TString leafList("");
677  for (UInt_t iTgt = 0; iTgt < fdsi->GetNTargets(); iTgt++) {
678  if (iTgt > 0) leafList.Append( ":" );
679  leafList.Append( fdsi->GetTargetInfo( iTgt ).GetInternalName() );
680  // leafList.Append( fdsi->GetTargetInfo( iTgt ).GetLabel() );
681  leafList.Append( "/F" );
682  }
683  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "itMethod->first " << itMethod->first << " LEAFLIST: "
684  << leafList << " itMethod->second " << itMethod->second << Endl;
685  tree->Branch( itMethod->first, (metVals[n]), leafList );
686  }
687  else {
688  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "Unknown analysis type for result found when writing TestTree." << Endl;
689  }
690  n++;
691 
692  }
693 
694  // loop through all the events
695  for (Long64_t iEvt = 0; iEvt < GetNEvents( type ); iEvt++) {
696  // write the event-variables
697  const Event* ev = GetEvent( iEvt );
698  // write the classnumber and the classname
699  cls = ev->GetClass();
700  weight = ev->GetWeight();
701  strlcpy(className, fdsi->GetClassInfo( cls )->GetName(), sizeof(className));
702 
703  // write the variables, targets and spectator variables
704  for (UInt_t ivar = 0; ivar < ev->GetNVariables(); ivar++) varVals[ivar] = ev->GetValue( ivar );
705  for (UInt_t itgt = 0; itgt < ev->GetNTargets(); itgt++) tgtVals[itgt] = ev->GetTarget( itgt );
706  for (UInt_t ivis = 0; ivis < ev->GetNSpectators(); ivis++) visVals[ivis] = ev->GetSpectator( ivis );
707 
708 
709  // loop through all the results and write the branches
710  n=0;
711  for (std::map<TString, Results*>::iterator itMethod = fResults.at(t).begin();
712  itMethod != fResults.at(t).end(); itMethod++) {
713  Results* results = itMethod->second;
714 
715  const std::vector< Float_t >& vals = results->operator[](iEvt);
716 
717  if (itMethod->second->GetAnalysisType() == Types::kClassification) {
718  // classification
719  metVals[n][0] = vals[0];
720  }
721  else if (itMethod->second->GetAnalysisType() == Types::kMulticlass) {
722  // multiclass classification
723  for (UInt_t nCls = 0, nClsEnd=fdsi->GetNClasses(); nCls < nClsEnd; nCls++) {
724  Float_t val = vals.at(nCls);
725  metVals[n][nCls] = val;
726  }
727  }
728  else if (itMethod->second->GetAnalysisType() == Types::kRegression) {
729  // regression
730  for (UInt_t nTgts = 0; nTgts < fdsi->GetNTargets(); nTgts++) {
731  Float_t val = vals.at(nTgts);
732  metVals[n][nTgts] = val;
733  }
734  }
735  n++;
736  }
737  // fill the variables into the tree
738  tree->Fill();
739  }
740 
741  Log() << kHEADER //<< Form("[%s] : ",fdsi.GetName())
742  << "Created tree '" << tree->GetName() << "' with " << tree->GetEntries() << " events" << Endl << Endl;
743 
744  SetCurrentType(savedType);
745 
746  delete[] varVals;
747  delete[] tgtVals;
748  delete[] visVals;
749 
750  for(UInt_t i=0; i<fResults.at(t).size(); i++ )
751  delete[] metVals[i];
752  delete[] metVals;
753 
754  return tree;
755 }
756 
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:145
UInt_t GetNVariables() const
Definition: DataSetInfo.h:110
Random number generator class based on M.
Definition: TRandom3.h:27
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
long long Long64_t
Definition: RtypesCore.h:69
Class that is the base-class for a vector of result.
Long64_t fTrainingBlockSize
Definition: DataSet.h:178
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingSelected
Definition: DataSet.h:162
const TString & GetInternalName() const
Definition: VariableInfo.h:58
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Definition: DataSet.cxx:241
std::vector< VariableInfo > & GetSpectatorInfos()
Definition: DataSetInfo.h:104
virtual Double_t Rndm()
Machine independent random number generator.
Definition: TRandom3.cxx:100
float Float_t
Definition: RtypesCore.h:53
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingEventList
Definition: DataSet.h:161
DataSet()
constructor
Definition: DataSet.cxx:91
std::vector< std::vector< Event * > > fEventCollection
Definition: DataSet.h:150
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition: DataSet.cxx:479
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4364
TRandom3 * fSamplingRandom
Definition: DataSet.h:163
EAnalysisType
Definition: Types.h:125
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition: DataSet.cxx:216
void SetTreeType(Types::ETreeType type)
Definition: Results.h:67
Basic string class.
Definition: TString.h:125
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< Char_t > fBlockBelongToTraining
Definition: DataSet.h:174
void ClearNClassEvents(Int_t type)
Definition: DataSet.cxx:160
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:422
Class that contains all the information of a class.
Definition: ClassInfo.h:49
UInt_t TreeIndex(Types::ETreeType type) const
Definition: DataSet.h:190
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:232
The TNamed class is the base class for all named ROOT classes.
Definition: TNamed.h:29
virtual ~DataSet()
destructor
Definition: DataSet.cxx:123
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
Definition: DataSet.cxx:580
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:203
UInt_t GetClass() const
Definition: Event.h:81
TString & Append(const char *cs)
Definition: TString.h:495
Class that contains all the data information.
Definition: DataSetInfo.h:60
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
std::vector< VariableInfo > & GetTargetInfos()
Definition: DataSetInfo.h:99
Bool_t fHasNegativeEventWeights
Definition: DataSet.h:170
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:320
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
Definition: DataSet.cxx:386
void ApplyTrainingSetDivision()
apply division of data set
Definition: DataSet.cxx:366
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:97
UInt_t GetNTargets() const
Definition: DataSetInfo.h:111
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
std::vector< std::vector< Long64_t > > fClassEvents
Definition: DataSet.h:167
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:398
ClassInfo * GetClassInfo(Int_t clNum) const
VariableInfo & GetTargetInfo(Int_t i)
Definition: DataSetInfo.h:101
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particular Method instance.
Definition: DataSet.cxx:316
UInt_t GetNSpectators() const
accessor to the number of spectators
Definition: Event.cxx:328
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
void DivideTrainingSet(UInt_t blockNum)
divide training set
Definition: DataSet.cxx:342
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
Definition: DataSet.cxx:189
UInt_t GetNSpectators(bool all=kTRUE) const
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:406
const DataSetInfo * fdsi
Definition: DataSet.h:148
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:309
const Bool_t kFALSE
Definition: RtypesCore.h:88
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:151
std::vector< Char_t > fSampling
Definition: DataSet.h:158
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition: DataSet.cxx:543
std::vector< Float_t > fSamplingWeight
Definition: DataSet.h:160
Long64_t fCurrentEventIdx
Definition: DataSet.h:155
UInt_t fCurrentTreeIdx
[train/test/...][method-identifier]
Definition: DataSet.h:154
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:414
MsgLogger & Log() const
message logger
Definition: DataSet.h:173
int type
Definition: TGX11.cxx:120
Class which takes the results of a multiclass classification.
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
void SetEventCollection(std::vector< Event *> *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
Definition: DataSet.cxx:250
std::vector< Int_t > fSamplingNEvents
Definition: DataSet.h:159
UInt_t GetNumber() const
Definition: ClassInfo.h:65
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:67
virtual Long64_t GetEntries() const
Definition: TTree.h:382
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:168
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1701
MsgLogger * fLogger
Definition: DataSet.h:172
#define dest(otri, vertexptr)
Definition: triangle.c:1040
Class that is the base-class for a vector of result.
Definition: Results.h:57
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:215
Definition: tree.py:1
A TTree object has a header with a name and a title.
Definition: TTree.h:70
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:224
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Definition: Event.cxx:262
const Bool_t kTRUE
Definition: RtypesCore.h:87
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition: DataSet.cxx:430
const Int_t n
Definition: legend1.C:16
const Event * GetEvent() const
Definition: DataSet.cxx:202
std::vector< VariableInfo > & GetVariableInfos()
Definition: DataSetInfo.h:94
Class that is the base-class for a vector of result.
std::vector< std::map< TString, Results *> > fResults
Definition: DataSet.h:152