Logo ROOT   6.08/07
Reference Guide
DataSet.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : DataSet *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
17  * *
18  * Copyright (c) 2006: *
19  * CERN, Switzerland *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #include <vector>
28 #include <algorithm>
29 #include <cstdlib>
30 #include <stdexcept>
31 #include <algorithm>
32 
33 #ifndef ROOT_TMVA_DataSetInfo
34 #include "TMVA/DataSetInfo.h"
35 #endif
36 #ifndef ROOT_TMVA_DataSet
37 #include "TMVA/DataSet.h"
38 #endif
39 #ifndef ROOT_TMVA_Event
40 #include "TMVA/Event.h"
41 #endif
42 #ifndef ROOT_TMVA_MsgLogger
43 #include "TMVA/MsgLogger.h"
44 #endif
45 #ifndef ROOT_TMVA_ResultsRegression
46 #include "TMVA/ResultsRegression.h"
47 #endif
48 #ifndef ROOT_TMVA_ResultsClassification
50 #endif
51 #ifndef ROOT_TMVA_ResultsMulticlass
52 #include "TMVA/ResultsMulticlass.h"
53 #endif
54 #ifndef ROOT_TMVA_Configurable
55 #include "TMVA/Configurable.h"
56 #endif
57 
58 #include "TMVA/Types.h"
59 #include "TMVA/Results.h"
60 #include "TMVA/VariableInfo.h"
61 
62 #include "TRandom3.h"
63 
64 
65 ////////////////////////////////////////////////////////////////////////////////
66 /// constructor
67 
69  :TNamed(dsi.GetName(),"DataSet"),
70  fdsi(&dsi),
71  fEventCollection(4),
72  fCurrentTreeIdx(0),
73  fCurrentEventIdx(0),
74  fHasNegativeEventWeights(kFALSE),
75  fLogger( new MsgLogger(TString(TString("Dataset:")+dsi.GetName()).Data()) ),
76  fTrainingBlockSize(0)
77 {
78 
79  fClassEvents.resize(4);
80  fBlockBelongToTraining.reserve(10);
81  fBlockBelongToTraining.push_back(kTRUE);
82 
83  // sampling
84  fSamplingRandom = 0;
85 
86  Int_t treeNum = 2;
87  fSampling.resize( treeNum );
88  fSamplingNEvents.resize( treeNum );
89  fSamplingWeight.resize(treeNum);
90 
91  for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
92  fSampling.at(treeIdx) = kFALSE;
93  fSamplingNEvents.at(treeIdx) = 0;
94  fSamplingWeight.at(treeIdx) = 1.0;
95  }
96 }
97 
99  :fdsi(new DataSetInfo(GetName())),
100  fEventCollection(4),
101  fCurrentTreeIdx(0),
102  fCurrentEventIdx(0),
104  fLogger( new MsgLogger(TString(TString("Dataset:")+GetName()).Data()) ),
106 {
107 
108  fClassEvents.resize(4);
109  fBlockBelongToTraining.reserve(10);
110  fBlockBelongToTraining.push_back(kTRUE);
111 
112  // sampling
113  fSamplingRandom = 0;
114 
115  Int_t treeNum = 2;
116  fSampling.resize( treeNum );
117  fSamplingNEvents.resize( treeNum );
118  fSamplingWeight.resize(treeNum);
119 
120  for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
121  fSampling.at(treeIdx) = kFALSE;
122  fSamplingNEvents.at(treeIdx) = 0;
123  fSamplingWeight.at(treeIdx) = 1.0;
124  }
125 }
126 
127 
128 
129 ////////////////////////////////////////////////////////////////////////////////
130 /// destructor
131 
133 {
134  // delete event collection
135  Bool_t deleteEvents=true; // dataset owns the events /JS
136  DestroyCollection( Types::kTraining, deleteEvents );
137  DestroyCollection( Types::kTesting, deleteEvents );
138 
139  fBlockBelongToTraining.clear();
140  // delete results
141  for (std::vector< std::map< TString, Results* > >::iterator it = fResults.begin(); it != fResults.end(); it++) {
142  for (std::map< TString, Results* >::iterator itMap = (*it).begin(); itMap != (*it).end(); itMap++) {
143  delete itMap->second;
144  }
145  }
146 
147  // delete sampling
148  if (fSamplingRandom != 0 ) delete fSamplingRandom;
149 
150 
151  // need also to delete fEventCollections[2] and [3], not sure if they are used
152  DestroyCollection( Types::kValidation, deleteEvents );
154 
155  delete fLogger;
156 }
157 
158 ////////////////////////////////////////////////////////////////////////////////
159 
161 {
162  if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
163  if (fClassEvents.at( type ).size() < classNumber+1) fClassEvents.at( type ).resize( classNumber+1 );
164  fClassEvents.at( type ).at( classNumber ) += 1;
165 }
166 
167 ////////////////////////////////////////////////////////////////////////////////
168 
170 {
171  if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
172  fClassEvents.at( type ).clear();
173 }
174 
175 ////////////////////////////////////////////////////////////////////////////////
176 
178 {
179  try {
180  return fClassEvents.at(type).at(classNumber);
181  }
182  catch (std::out_of_range excpt) {
183  ClassInfo* ci = fdsi->GetClassInfo( classNumber );
184  Log() << kFATAL << Form("Dataset[%s] : ",fdsi->GetName()) << "No " << (type==0?"training":(type==1?"testing":"_unknown_type_"))
185  << " events for class " << (ci==NULL?"_no_name_known_":ci->GetName()) << " (index # "<<classNumber<<")"
186  << " available. Check if all class names are spelled correctly and if events are"
187  << " passing the selection cuts." << Endl;
188  }
189  catch (...) {
190  Log() << kFATAL << Form("Dataset[%s] : ",fdsi->GetName()) << "ERROR/CAUGHT : DataSet/GetNClassEvents, .. unknown error" << Endl;
191  }
192  return 0;
193 }
194 
195 ////////////////////////////////////////////////////////////////////////////////
196 /// destroys the event collection (events + vector)
197 
199 {
200  UInt_t i = TreeIndex(type);
201  if (i>=fEventCollection.size() || fEventCollection[i].size()==0) return;
202  if (deleteEvents) {
203 
204  for (UInt_t j=0; j<fEventCollection[i].size(); j++) delete fEventCollection[i][j];
205  }
206  fEventCollection[i].clear();
207 }
208 
209 ////////////////////////////////////////////////////////////////////////////////
210 
212 {
215  return ((fEventCollection.at(fCurrentTreeIdx))).at(iEvt);
216  }
217  else {
219  }
220 }
221 
222 ////////////////////////////////////////////////////////////////////////////////
223 /// access the number of variables through the datasetinfo
224 
226 {
227  return fdsi->GetNVariables();
228 }
229 
230 ////////////////////////////////////////////////////////////////////////////////
231 /// access the number of targets through the datasetinfo
232 
234 {
235  return fdsi->GetNTargets();
236 }
237 
238 ////////////////////////////////////////////////////////////////////////////////
239 /// access the number of targets through the datasetinfo
240 
242 {
243  return fdsi->GetNSpectators();
244 }
245 
246 ////////////////////////////////////////////////////////////////////////////////
247 /// add event to event list
248 /// after which the event is owned by the dataset
249 
251 {
252  fEventCollection.at(Int_t(type)).push_back(ev);
254 }
255 
256 ////////////////////////////////////////////////////////////////////////////////
257 /// Sets the event collection (by DataSetFactory)
258 
259 void TMVA::DataSet::SetEventCollection(std::vector<TMVA::Event*>* events, Types::ETreeType type, Bool_t deleteEvents)
260 {
261  DestroyCollection(type,deleteEvents);
262 
263  const Int_t t = TreeIndex(type);
264  ClearNClassEvents( type );
265  //pointer to std::vector is not serializable,
266  fEventCollection.at(t) = *events;
267  for (std::vector<Event*>::iterator it = fEventCollection.at(t).begin(); it < fEventCollection.at(t).end(); it++) {
268  IncrementNClassEvents( t, (*it)->GetClass() );
269  }
270 }
271 
272 ////////////////////////////////////////////////////////////////////////////////
273 /// TString info(resultsName+"/");
274 /// switch(type) {
275 /// case Types::kTraining: info += "kTraining/"; break;
276 /// case Types::kTesting: info += "kTesting/"; break;
277 /// default: break;
278 /// }
279 /// switch(analysistype) {
280 /// case Types::kClassification: info += "kClassification"; break;
281 /// case Types::kRegression: info += "kRegression"; break;
282 /// case Types::kNoAnalysisType: info += "kNoAnalysisType"; break;
283 /// case Types::kMaxAnalysisType:info += "kMaxAnalysisType"; break;
284 /// }
285 
288  Types::EAnalysisType analysistype )
289 {
290  UInt_t t = TreeIndex(type);
291  if (t<fResults.size()) {
292  const std::map< TString, Results* >& resultsForType = fResults[t];
293  std::map< TString, Results* >::const_iterator it = resultsForType.find(resultsName);
294  if (it!=resultsForType.end()) {
295  //Log() << kINFO << " GetResults("<<info<<") returns existing result." << Endl;
296  return it->second;
297  }
298  }
299  else {
300  fResults.resize(t+1);
301  }
302 
303  // nothing found
304 
305  Results * newresults = 0;
306  switch(analysistype) {
308  newresults = new ResultsClassification(fdsi,resultsName);
309  break;
310  case Types::kRegression:
311  newresults = new ResultsRegression(fdsi,resultsName);
312  break;
313  case Types::kMulticlass:
314  newresults = new ResultsMulticlass(fdsi,resultsName);
315  break;
317  newresults = new ResultsClassification(fdsi,resultsName);
318  break;
320  //Log() << kINFO << " GetResults("<<info<<") can't create new one." << Endl;
321  return 0;
322  break;
323  }
324 
325  newresults->SetTreeType( type );
326  fResults[t][resultsName] = newresults;
327 
328  //Log() << kINFO << " GetResults("<<info<<") builds new result." << Endl;
329  return newresults;
330 }
331 ////////////////////////////////////////////////////////////////////////////////
332 /// delete the results stored for this particulary
333 /// Method instance (here appareantly called resultsName instead of MethodTitle
334 /// Tree type (Training, testing etc..)
335 /// Analysis Type (Classification, Multiclass, Regression etc..)
336 
337 void TMVA::DataSet::DeleteResults( const TString & resultsName,
339  Types::EAnalysisType /* analysistype */ )
340 {
341  if (fResults.empty()) return;
342 
343  if (UInt_t(type) > fResults.size()){
344  Log()<<kFATAL<< Form("Dataset[%s] : ",fdsi->GetName()) << "you asked for an Treetype (training/testing/...)"
345  << " whose index " << type << " does not exist " << Endl;
346  }
347  std::map< TString, Results* >& resultsForType = fResults[UInt_t(type)];
348  std::map< TString, Results* >::iterator it = resultsForType.find(resultsName);
349  if (it!=resultsForType.end()) {
350  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << " Delete Results previous existing result:" << resultsName
351  << " of type " << type << Endl;
352  delete it->second;
353  resultsForType.erase(it->first);
354  }
355  else {
356  Log() << kINFO << Form("Dataset[%s] : ",fdsi->GetName()) << "could not fine Result class of " << resultsName
357  << " of type " << type << " which I should have deleted" << Endl;
358  }
359 }
360 ////////////////////////////////////////////////////////////////////////////////
361 /// divide training set
362 
364 {
366  // not changing anything ??
367  if (fBlockBelongToTraining.size() == blockNum) return;
368  // storing the original training vector
369  if (fBlockBelongToTraining.size() == 1) {
370  if (fEventCollection[tOrg].size() == 0)
371  fEventCollection[tOrg].resize(fEventCollection[tTrn].size());
372  fEventCollection[tOrg].clear();
373  for (UInt_t i=0; i<fEventCollection[tTrn].size(); i++)
374  fEventCollection[tOrg].push_back(fEventCollection[tTrn][i]);
375  fClassEvents[tOrg] = fClassEvents[tTrn];
376  }
377  //reseting the event division vector
378  fBlockBelongToTraining.clear();
379  for (UInt_t i=0 ; i < blockNum ; i++) fBlockBelongToTraining.push_back(kTRUE);
380 
382 }
383 
384 ////////////////////////////////////////////////////////////////////////////////
385 /// apply division of data set
386 
388 {
390  fEventCollection[tTrn].clear();
391  if (fEventCollection[tVld].size()==0)
392  fEventCollection[tVld].resize(fEventCollection[tOrg].size());
393  fEventCollection[tVld].clear();
394 
395  //creating the new events collections, notice that the events that can't be evenly divided belong to the last event
396  for (UInt_t i=0; i<fEventCollection[tOrg].size(); i++) {
398  fEventCollection[tTrn].push_back(fEventCollection[tOrg][i]);
399  else
400  fEventCollection[tVld].push_back(fEventCollection[tOrg][i]);
401  }
402 }
403 
404 ////////////////////////////////////////////////////////////////////////////////
405 /// move training block
406 
408 {
409  if (dest == Types::kValidation)
410  fBlockBelongToTraining[blockInd]=kFALSE;
411  else
412  fBlockBelongToTraining[blockInd]=kTRUE;
413  if (applyChanges) ApplyTrainingSetDivision();
414 }
415 
416 ////////////////////////////////////////////////////////////////////////////////
417 /// return number of signal test events in dataset
418 
420 {
421  return GetNClassEvents(Types::kTesting, fdsi->GetClassInfo("Signal")->GetNumber() );
422 }
423 
424 ////////////////////////////////////////////////////////////////////////////////
425 /// return number of background test events in dataset
426 
428 {
429  return GetNClassEvents(Types::kTesting, fdsi->GetClassInfo("Background")->GetNumber() );
430 }
431 
432 ////////////////////////////////////////////////////////////////////////////////
433 /// return number of signal training events in dataset
434 
436 {
438 }
439 
440 ////////////////////////////////////////////////////////////////////////////////
441 /// return number of background training events in dataset
442 
444 {
445  return GetNClassEvents(Types::kTraining, fdsi->GetClassInfo("Background")->GetNumber() );
446 }
447 
448 ////////////////////////////////////////////////////////////////////////////////
449 /// initialize random or importance sampling
450 
451 void TMVA::DataSet::InitSampling( Float_t fraction, Float_t weight, UInt_t seed )
452 {
453  // add a random generator if not yet present
454  if (fSamplingRandom == 0 ) fSamplingRandom = new TRandom3( seed );
455 
456  // first, clear the lists
457  std::vector< std::pair< Float_t, Long64_t >* > evtList;
458  std::vector< std::pair< Float_t, Long64_t >* >::iterator it;
459 
460  Int_t treeIdx = TreeIndex( GetCurrentType() );
461 
462  if (fSamplingEventList.size() < UInt_t(treeIdx+1) ) fSamplingEventList.resize(treeIdx+1);
463  if (fSamplingSelected.size() < UInt_t(treeIdx+1) ) fSamplingSelected.resize(treeIdx+1);
464 // for (it = fSamplingEventList.at(treeIdx).begin(); it != fSamplingEventList.at(treeIdx).end(); it++ ) delete (*it);
465  fSamplingEventList.at(treeIdx).clear();
466  fSamplingSelected.at(treeIdx).clear();
467 
468  if (fSampling.size() < UInt_t(treeIdx+1) ) fSampling.resize(treeIdx+1);
469  if (fSamplingNEvents.size() < UInt_t(treeIdx+1) ) fSamplingNEvents.resize(treeIdx+1);
470  if (fSamplingWeight.size() < UInt_t(treeIdx+1) ) fSamplingWeight.resize(treeIdx+1);
471 
472  if (fraction > 0.999999 || fraction < 0.0000001) {
473  fSampling.at( treeIdx ) = false;
474  fSamplingNEvents.at( treeIdx ) = 0;
475  fSamplingWeight.at( treeIdx ) = 1.0;
476  return;
477  }
478 
479  // for the initialization, the sampling has to be turned off, afterwards we will turn it on
480  fSampling.at( treeIdx ) = false;
481 
482  fSamplingNEvents.at( treeIdx ) = Int_t(fraction*GetNEvents());
483  fSamplingWeight.at( treeIdx ) = weight;
484 
485  Long64_t nEvts = GetNEvents();
486  fSamplingEventList.at( treeIdx ).reserve( nEvts );
487  fSamplingSelected.at( treeIdx ).reserve( fSamplingNEvents.at(treeIdx) );
488  for (Long64_t ievt=0; ievt<nEvts; ievt++) {
489  std::pair<Float_t,Long64_t> p(1.0,ievt);
490  fSamplingEventList.at( treeIdx ).push_back( p );
491  }
492 
493  // now turn sampling on
494  fSampling.at( treeIdx ) = true;
495 }
496 
497 
498 ////////////////////////////////////////////////////////////////////////////////
499 /// create an event sampling (random or importance sampling)
500 
502 {
503  Int_t treeIdx = TreeIndex( GetCurrentType() );
504 
505  if (!fSampling.at(treeIdx) ) return;
506 
507  if (fSamplingRandom == 0 )
508  Log() << kFATAL<< Form("Dataset[%s] : ",fdsi->GetName())
509  << "no random generator present for creating a random/importance sampling (initialized?)" << Endl;
510 
511  // delete the previous selection
512  fSamplingSelected.at(treeIdx).clear();
513 
514  // create a temporary event-list
515  std::vector< std::pair< Float_t, Long64_t > > evtList;
516  std::vector< std::pair< Float_t, Long64_t > >::iterator evtListIt;
517 
518  // some variables
519  Float_t sumWeights = 0;
520 
521  // make a copy of the event-list
522  evtList.assign( fSamplingEventList.at(treeIdx).begin(), fSamplingEventList.at(treeIdx).end() );
523 
524  // sum up all the weights (internal weights for importance sampling)
525  for (evtListIt = evtList.begin(); evtListIt != evtList.end(); evtListIt++) {
526  sumWeights += (*evtListIt).first;
527  }
528  evtListIt = evtList.begin();
529 
530  // random numbers
531  std::vector< Float_t > rnds;
532  rnds.reserve(fSamplingNEvents.at(treeIdx));
533 
534  Float_t pos = 0;
535  for (Int_t i = 0; i < fSamplingNEvents.at(treeIdx); i++) {
536  pos = fSamplingRandom->Rndm()*sumWeights;
537  rnds.push_back( pos );
538  }
539 
540  // sort the random numbers
541  std::sort(rnds.begin(),rnds.end());
542 
543  // select the events according to the random numbers
544  std::vector< Float_t >::iterator rndsIt = rnds.begin();
545  Float_t runningSum = 0.000000001;
546  for (evtListIt = evtList.begin(); evtListIt != evtList.end();) {
547  runningSum += (*evtListIt).first;
548  if (runningSum >= (*rndsIt)) {
549  fSamplingSelected.at(treeIdx).push_back( (*evtListIt) );
550  evtListIt = evtList.erase( evtListIt );
551 
552  rndsIt++;
553  if (rndsIt == rnds.end() ) break;
554  }
555  else {
556  evtListIt++;
557  }
558  }
559 }
560 
561 ////////////////////////////////////////////////////////////////////////////////
562 /// increase the importance sampling weight of the event
563 /// when not successful and decrease it when successful
564 
565 void TMVA::DataSet::EventResult( Bool_t successful, Long64_t evtNumber )
566 {
567 
568  if (!fSampling.at(fCurrentTreeIdx)) return;
569  if (fSamplingWeight.at(fCurrentTreeIdx) > 0.99999999999) return;
570 
571  Long64_t start = 0;
572  Long64_t stop = fSamplingEventList.at(fCurrentTreeIdx).size() -1;
573  if (evtNumber >= 0) {
574  start = evtNumber;
575  stop = evtNumber;
576  }
577  for ( Long64_t iEvt = start; iEvt <= stop; iEvt++ ){
578  if (Long64_t(fSamplingEventList.at(fCurrentTreeIdx).size()) < iEvt) {
579  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "event number (" << iEvt
580  << ") larger than number of sampled events ("
581  << fSamplingEventList.at(fCurrentTreeIdx).size() << " of tree " << fCurrentTreeIdx << ")" << Endl;
582  return;
583  }
584  Float_t weight = fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first;
585  if (!successful) {
586  // weight /= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
587  weight /= fSamplingWeight.at(fCurrentTreeIdx);
588  if (weight > 1.0 ) weight = 1.0;
589  }
590  else {
591  // weight *= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
592  weight *= fSamplingWeight.at(fCurrentTreeIdx);
593  }
594  fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first = weight;
595  }
596 }
597 
598 
599 ////////////////////////////////////////////////////////////////////////////////
600 /// create the test/trainings tree with all the variables, the weights, the classes, the targets, the spectators, the MVA outputs
601 
603 {
604  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "GetTree(" << ( type==Types::kTraining ? "training" : "testing" ) << ")" << Endl;
605 
606  // the dataset does not hold the tree, this function returns a new tree everytime it is called
607 
608  if (type!=Types::kTraining && type!=Types::kTesting) return 0;
609 
610  Types::ETreeType savedType = GetCurrentType();
611 
612  SetCurrentType(type);
613  const UInt_t t = TreeIndex(type);
614  if (fResults.size() <= t) {
615  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "No results for treetype " << ( type==Types::kTraining ? "training" : "testing" )
616  << " found. Size=" << fResults.size() << Endl;
617  }
618 
619  // return number of background training events in dataset
620  TString treeName( (type == Types::kTraining ? "TrainTree" : "TestTree" ) );
621  TTree *tree = new TTree(treeName,treeName);
622 
623  Float_t *varVals = new Float_t[fdsi->GetNVariables()];
624  Float_t *tgtVals = new Float_t[fdsi->GetNTargets()];
625  Float_t *visVals = new Float_t[fdsi->GetNSpectators()];
626 
627  UInt_t cls;
628  Float_t weight;
629  // TObjString *className = new TObjString();
630  char *className = new char[40];
631 
632 
633  //Float_t metVals[fResults.at(t).size()][Int_t(fdsi->GetNTargets()+1)];
634  // replace by: [Joerg]
635  Float_t **metVals = new Float_t*[fResults.at(t).size()];
636  for(UInt_t i=0; i<fResults.at(t).size(); i++ )
637  metVals[i] = new Float_t[fdsi->GetNTargets()+fdsi->GetNClasses()];
638 
639  // create branches for event-variables
640  tree->Branch( "classID", &cls, "classID/I" );
641  tree->Branch( "className",(void*)className, "className/C" );
642 
643  // create all branches for the variables
644  Int_t n = 0;
645  for (std::vector<VariableInfo>::const_iterator itVars = fdsi->GetVariableInfos().begin();
646  itVars != fdsi->GetVariableInfos().end(); itVars++) {
647 
648  // has to be changed to take care of types different than float: TODO
649  tree->Branch( (*itVars).GetInternalName(), &varVals[n], (*itVars).GetInternalName()+TString("/F") );
650  n++;
651  }
652  // create the branches for the targets
653  n = 0;
654  for (std::vector<VariableInfo>::const_iterator itTgts = fdsi->GetTargetInfos().begin();
655  itTgts != fdsi->GetTargetInfos().end(); itTgts++) {
656  // has to be changed to take care of types different than float: TODO
657  tree->Branch( (*itTgts).GetInternalName(), &tgtVals[n], (*itTgts).GetInternalName()+TString("/F") );
658  n++;
659  }
660  // create the branches for the spectator variables
661  n = 0;
662  for (std::vector<VariableInfo>::const_iterator itVis = fdsi->GetSpectatorInfos().begin();
663  itVis != fdsi->GetSpectatorInfos().end(); itVis++) {
664  // has to be changed to take care of types different than float: TODO
665  tree->Branch( (*itVis).GetInternalName(), &visVals[n], (*itVis).GetInternalName()+TString("/F") );
666  n++;
667  }
668 
669  tree->Branch( "weight", &weight, "weight/F" );
670 
671  // create all the branches for the results
672  n = 0;
673  for (std::map< TString, Results* >::iterator itMethod = fResults.at(t).begin();
674  itMethod != fResults.at(t).end(); itMethod++) {
675 
676 
677  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "analysis type: " << (itMethod->second->GetAnalysisType()==Types::kRegression ? "Regression" :
678  (itMethod->second->GetAnalysisType()==Types::kMulticlass ? "Multiclass" : "Classification" )) << Endl;
679 
680  if (itMethod->second->GetAnalysisType() == Types::kClassification) {
681  // classification
682  tree->Branch( itMethod->first, &(metVals[n][0]), itMethod->first + "/F" );
683  }
684  else if (itMethod->second->GetAnalysisType() == Types::kMulticlass) {
685  // multiclass classification
686  TString leafList("");
687  for (UInt_t iCls = 0; iCls < fdsi->GetNClasses(); iCls++) {
688  if (iCls > 0) leafList.Append( ":" );
689  leafList.Append( fdsi->GetClassInfo( iCls )->GetName() );
690  leafList.Append( "/F" );
691  }
692  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "itMethod->first " << itMethod->first << " LEAFLIST: "
693  << leafList << " itMethod->second " << itMethod->second << Endl;
694  tree->Branch( itMethod->first, (metVals[n]), leafList );
695  }
696  else if (itMethod->second->GetAnalysisType() == Types::kRegression) {
697  // regression
698  TString leafList("");
699  for (UInt_t iTgt = 0; iTgt < fdsi->GetNTargets(); iTgt++) {
700  if (iTgt > 0) leafList.Append( ":" );
701  leafList.Append( fdsi->GetTargetInfo( iTgt ).GetInternalName() );
702  // leafList.Append( fdsi->GetTargetInfo( iTgt ).GetLabel() );
703  leafList.Append( "/F" );
704  }
705  Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "itMethod->first " << itMethod->first << " LEAFLIST: "
706  << leafList << " itMethod->second " << itMethod->second << Endl;
707  tree->Branch( itMethod->first, (metVals[n]), leafList );
708  }
709  else {
710  Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "Unknown analysis type for result found when writing TestTree." << Endl;
711  }
712  n++;
713 
714  }
715 
716  // loop through all the events
717  for (Long64_t iEvt = 0; iEvt < GetNEvents( type ); iEvt++) {
718  // write the event-variables
719  const Event* ev = GetEvent( iEvt );
720  // write the classnumber and the classname
721  cls = ev->GetClass();
722  weight = ev->GetWeight();
723  TString tmp = fdsi->GetClassInfo( cls )->GetName();
724  for (Int_t itmp = 0; itmp < tmp.Sizeof(); itmp++) {
725  className[itmp] = tmp(itmp);
726  className[itmp+1] = 0;
727  }
728 
729  // write the variables, targets and spectator variables
730  for (UInt_t ivar = 0; ivar < ev->GetNVariables(); ivar++) varVals[ivar] = ev->GetValue( ivar );
731  for (UInt_t itgt = 0; itgt < ev->GetNTargets(); itgt++) tgtVals[itgt] = ev->GetTarget( itgt );
732  for (UInt_t ivis = 0; ivis < ev->GetNSpectators(); ivis++) visVals[ivis] = ev->GetSpectator( ivis );
733 
734 
735  // loop through all the results and write the branches
736  n=0;
737  for (std::map<TString, Results*>::iterator itMethod = fResults.at(t).begin();
738  itMethod != fResults.at(t).end(); itMethod++) {
739  Results* results = itMethod->second;
740 
741  const std::vector< Float_t >& vals = results->operator[](iEvt);
742 
743  if (itMethod->second->GetAnalysisType() == Types::kClassification) {
744  // classification
745  metVals[n][0] = vals[0];
746  }
747  else if (itMethod->second->GetAnalysisType() == Types::kMulticlass) {
748  // multiclass classification
749  for (UInt_t nCls = 0, nClsEnd=fdsi->GetNClasses(); nCls < nClsEnd; nCls++) {
750  Float_t val = vals.at(nCls);
751  metVals[n][nCls] = val;
752  }
753  }
754  else if (itMethod->second->GetAnalysisType() == Types::kRegression) {
755  // regression
756  for (UInt_t nTgts = 0; nTgts < fdsi->GetNTargets(); nTgts++) {
757  Float_t val = vals.at(nTgts);
758  metVals[n][nTgts] = val;
759  }
760  }
761  n++;
762  }
763  // fill the variables into the tree
764  tree->Fill();
765  }
766 
767  Log() << kHEADER //<< Form("[%s] : ",fdsi.GetName())
768  << "Created tree '" << tree->GetName() << "' with " << tree->GetEntries() << " events" << Endl << Endl;
769 
770  SetCurrentType(savedType);
771 
772  delete[] varVals;
773  delete[] tgtVals;
774  delete[] visVals;
775 
776  for(UInt_t i=0; i<fResults.at(t).size(); i++ )
777  delete[] metVals[i];
778  delete[] metVals;
779 
780  delete[] className;
781 
782  return tree;
783 }
784 
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:140
UInt_t GetNVariables() const
Definition: DataSetInfo.h:128
Random number generator class based on M.
Definition: TRandom3.h:29
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
long long Long64_t
Definition: RtypesCore.h:69
Long64_t fTrainingBlockSize
Definition: DataSet.h:192
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingSelected
Definition: DataSet.h:176
const TString & GetInternalName() const
Definition: VariableInfo.h:66
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Definition: DataSet.cxx:250
std::vector< VariableInfo > & GetSpectatorInfos()
Definition: DataSetInfo.h:122
virtual Double_t Rndm()
Machine independent random number generator.
Definition: TRandom3.cxx:94
float Float_t
Definition: RtypesCore.h:53
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingEventList
Definition: DataSet.h:175
std::vector< std::vector< Event * > > fEventCollection
Definition: DataSet.h:164
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition: DataSet.cxx:501
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4375
TRandom3 * fSamplingRandom
Definition: DataSet.h:177
EAnalysisType
Definition: Types.h:129
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition: DataSet.cxx:225
void SetTreeType(Types::ETreeType type)
Definition: Results.h:73
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< Char_t > fBlockBelongToTraining
Definition: DataSet.h:188
const Bool_t kFALSE
Definition: Rtypes.h:92
void ClearNClassEvents(Int_t type)
Definition: DataSet.cxx:169
UInt_t GetNClasses() const
Definition: DataSetInfo.h:154
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:443
UInt_t TreeIndex(Types::ETreeType type) const
Definition: DataSet.h:204
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:241
The TNamed class is the base class for all named ROOT classes.
Definition: TNamed.h:33
virtual ~DataSet()
destructor
Definition: DataSet.cxx:132
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
Definition: DataSet.cxx:602
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:217
UInt_t GetClass() const
Definition: Event.h:89
TString & Append(const char *cs)
Definition: TString.h:492
std::vector< std::vector< double > > Data
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
std::vector< VariableInfo > & GetTargetInfos()
Definition: DataSetInfo.h:117
Bool_t fHasNegativeEventWeights
Definition: DataSet.h:184
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:316
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
Definition: DataSet.cxx:407
void ApplyTrainingSetDivision()
apply division of data set
Definition: DataSet.cxx:387
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:104
UInt_t GetNTargets() const
Definition: DataSetInfo.h:129
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
Definition: DataSet.cxx:286
std::vector< std::vector< Long64_t > > fClassEvents
Definition: DataSet.h:181
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:419
ClassInfo * GetClassInfo(Int_t clNum) const
VariableInfo & GetTargetInfo(Int_t i)
Definition: DataSetInfo.h:119
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particulary Method instance (here appareantly called resultsName i...
Definition: DataSet.cxx:337
UInt_t GetNSpectators() const
accessor to the number of spectators
Definition: Event.cxx:324
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
void DivideTrainingSet(UInt_t blockNum)
divide training set
Definition: DataSet.cxx:363
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
Definition: DataSet.cxx:198
UInt_t GetNSpectators(bool all=kTRUE) const
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:427
const DataSetInfo * fdsi
Definition: DataSet.h:162
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:305
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:233
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:160
std::vector< Char_t > fSampling
Definition: DataSet.h:172
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition: DataSet.cxx:565
std::vector< Float_t > fSamplingWeight
Definition: DataSet.h:174
Long64_t fCurrentEventIdx
Definition: DataSet.h:169
UInt_t fCurrentTreeIdx
[train/test/...][method-identifier]
Definition: DataSet.h:168
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:435
MsgLogger & Log() const
message logger
Definition: DataSet.h:187
int type
Definition: TGX11.cxx:120
virtual Int_t Sizeof() const
Returns size string will occupy on I/O buffer.
Definition: TString.cxx:1298
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:114
void SetEventCollection(std::vector< Event *> *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
Definition: DataSet.cxx:259
std::vector< Int_t > fSamplingNEvents
Definition: DataSet.h:173
UInt_t GetNumber() const
Definition: ClassInfo.h:73
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:85
virtual Long64_t GetEntries() const
Definition: TTree.h:393
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:177
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1652
MsgLogger * fLogger
Definition: DataSet.h:186
#define dest(otri, vertexptr)
Definition: triangle.c:1040
#define NULL
Definition: Rtypes.h:82
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:229
Definition: tree.py:1
A TTree object has a header with a name and a title.
Definition: TTree.h:98
const Bool_t kTRUE
Definition: Rtypes.h:91
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:233
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Definition: Event.cxx:258
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition: DataSet.cxx:451
const Int_t n
Definition: legend1.C:16
const Event * GetEvent() const
Definition: DataSet.cxx:211
std::vector< VariableInfo > & GetVariableInfos()
Definition: DataSetInfo.h:112
std::vector< std::map< TString, Results *> > fResults
Definition: DataSet.h:166