ROOT  6.06/09
Reference Guide
DataSet.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : DataSet *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
17  * *
18  * Copyright (c) 2006: *
19  * CERN, Switzerland *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #include <vector>
28 #include <algorithm>
29 #include <cstdlib>
30 #include <stdexcept>
31 #include <algorithm>
32 
33 #ifndef ROOT_TMVA_DataSetInfo
34 #include "TMVA/DataSetInfo.h"
35 #endif
36 #ifndef ROOT_TMVA_DataSet
37 #include "TMVA/DataSet.h"
38 #endif
39 #ifndef ROOT_TMVA_Event
40 #include "TMVA/Event.h"
41 #endif
42 #ifndef ROOT_TMVA_MsgLogger
43 #include "TMVA/MsgLogger.h"
44 #endif
45 #ifndef ROOT_TMVA_ResultsRegression
46 #include "TMVA/ResultsRegression.h"
47 #endif
48 #ifndef ROOT_TMVA_ResultsClassification
50 #endif
51 #ifndef ROOT_TMVA_ResultsMulticlass
52 #include "TMVA/ResultsMulticlass.h"
53 #endif
54 #ifndef ROOT_TMVA_Configurable
55 #include "TMVA/Configurable.h"
56 #endif
57 
58 ////////////////////////////////////////////////////////////////////////////////
59 /// constructor
60 
62  : fdsi(dsi),
63  fEventCollection(4,(std::vector<Event*>*)0),
64  fCurrentTreeIdx(0),
65  fCurrentEventIdx(0),
66  fHasNegativeEventWeights(kFALSE),
67  fLogger( new MsgLogger(TString(TString("Dataset:")+dsi.GetName()).Data()) ),
68  fTrainingBlockSize(0)
69 {
70  for (UInt_t i=0; i<4; i++) fEventCollection[i] = new std::vector<Event*>;
71 
72  fClassEvents.resize(4);
73  fBlockBelongToTraining.reserve(10);
74  fBlockBelongToTraining.push_back(kTRUE);
75 
76  // sampling
77  fSamplingRandom = 0;
78 
79  Int_t treeNum = 2;
80  fSampling.resize( treeNum );
81  fSamplingNEvents.resize( treeNum );
82  fSamplingWeight.resize(treeNum);
83 
84  for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
85  fSampling.at(treeIdx) = kFALSE;
86  fSamplingNEvents.at(treeIdx) = 0;
87  fSamplingWeight.at(treeIdx) = 1.0;
88  }
89 }
90 
91 ////////////////////////////////////////////////////////////////////////////////
92 /// destructor
93 
95 {
96  // delete event collection
97  Bool_t deleteEvents=true; // dataset owns the events /JS
98  DestroyCollection( Types::kTraining, deleteEvents );
99  DestroyCollection( Types::kTesting, deleteEvents );
100 
101  fBlockBelongToTraining.clear();
102  // delete results
103  for (std::vector< std::map< TString, Results* > >::iterator it = fResults.begin(); it != fResults.end(); it++) {
104  for (std::map< TString, Results* >::iterator itMap = (*it).begin(); itMap != (*it).end(); itMap++) {
105  delete itMap->second;
106  }
107  }
108 
109  // delete sampling
110  if (fSamplingRandom != 0 ) delete fSamplingRandom;
111 
112  std::vector< std::pair< Float_t, Long64_t >* >::iterator itEv;
113  std::vector< std::vector<std::pair< Float_t, Long64_t >* > >::iterator treeIt;
114  for (treeIt = fSamplingEventList.begin(); treeIt != fSamplingEventList.end(); treeIt++ ) {
115  for (itEv = (*treeIt).begin(); itEv != (*treeIt).end(); itEv++) {
116  delete (*itEv);
117  }
118  }
119 
120  // need also to delete fEventCollections[2] and [3], not sure if they are used
121  DestroyCollection( Types::kValidation, deleteEvents );
122  DestroyCollection( Types::kTrainingOriginal, deleteEvents );
123 
124  delete fLogger;
125 }
126 
127 ////////////////////////////////////////////////////////////////////////////////
128 
130 {
131  if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
132  if (fClassEvents.at( type ).size() < classNumber+1) fClassEvents.at( type ).resize( classNumber+1 );
133  fClassEvents.at( type ).at( classNumber ) += 1;
134 }
135 
136 ////////////////////////////////////////////////////////////////////////////////
137 
139 {
140  if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
141  fClassEvents.at( type ).clear();
142 }
143 
144 ////////////////////////////////////////////////////////////////////////////////
145 
147 {
148  try {
149  return fClassEvents.at(type).at(classNumber);
150  }
151  catch (std::out_of_range excpt) {
152  ClassInfo* ci = fdsi.GetClassInfo( classNumber );
153  Log() << kFATAL << "No " << (type==0?"training":(type==1?"testing":"_unknown_type_"))
154  << " events for class " << (ci==NULL?"_no_name_known_":ci->GetName().Data()) << " (index # "<<classNumber<<")"
155  << " available. Check if all class names are spelled correctly and if events are"
156  << " passing the selection cuts." << Endl;
157  }
158  catch (...) {
159  Log() << kFATAL << "ERROR/CAUGHT : DataSet/GetNClassEvents, .. unknown error" << Endl;
160  }
161  return 0;
162 }
163 
164 ////////////////////////////////////////////////////////////////////////////////
165 /// destroys the event collection (events + vector)
166 
168 {
169  UInt_t i = TreeIndex(type);
170  if (i>=fEventCollection.size() || fEventCollection[i]==0) return;
171  if (deleteEvents) {
172  for (UInt_t j=0; j<fEventCollection[i]->size(); j++) delete (*fEventCollection[i])[j];
173  }
174  delete fEventCollection[i];
175  fEventCollection[i]=0;
176 }
177 
178 ////////////////////////////////////////////////////////////////////////////////
179 
181 {
182  if (fSampling.size() > UInt_t(fCurrentTreeIdx) && fSampling.at(fCurrentTreeIdx)) {
183  Long64_t iEvt = fSamplingSelected.at(fCurrentTreeIdx).at( fCurrentEventIdx )->second;
184  return (*(fEventCollection.at(fCurrentTreeIdx))).at(iEvt);
185  }
186  else {
187  return (*(fEventCollection.at(fCurrentTreeIdx))).at(fCurrentEventIdx);
188  }
189 }
190 
191 ////////////////////////////////////////////////////////////////////////////////
192 /// access the number of variables through the datasetinfo
193 
195 {
196  return fdsi.GetNVariables();
197 }
198 
199 ////////////////////////////////////////////////////////////////////////////////
200 /// access the number of targets through the datasetinfo
201 
203 {
204  return fdsi.GetNTargets();
205 }
206 
207 ////////////////////////////////////////////////////////////////////////////////
208 /// access the number of targets through the datasetinfo
209 
211 {
212  return fdsi.GetNSpectators();
213 }
214 
215 ////////////////////////////////////////////////////////////////////////////////
216 /// add event to event list
217 /// after which the event is owned by the dataset
218 
220 {
221  fEventCollection.at(Int_t(type))->push_back(ev);
222  if (ev->GetWeight()<0) fHasNegativeEventWeights = kTRUE;
223  fEvtCollIt=fEventCollection.at(fCurrentTreeIdx)->begin();
224 }
225 
226 ////////////////////////////////////////////////////////////////////////////////
227 /// Sets the event collection (by DataSetFactory)
228 
229 void TMVA::DataSet::SetEventCollection(std::vector<TMVA::Event*>* events, Types::ETreeType type)
230 {
231  Bool_t deleteEvents = true;
232  DestroyCollection(type,deleteEvents);
233 
234  const Int_t t = TreeIndex(type);
235  ClearNClassEvents( type );
236  fEventCollection.at(t) = events;
237  for (std::vector<Event*>::iterator it = fEventCollection.at(t)->begin(); it < fEventCollection.at(t)->end(); it++) {
238  IncrementNClassEvents( t, (*it)->GetClass() );
239  }
240  fEvtCollIt=fEventCollection.at(fCurrentTreeIdx)->begin();
241 }
242 
243 ////////////////////////////////////////////////////////////////////////////////
244 /// TString info(resultsName+"/");
245 /// switch(type) {
246 /// case Types::kTraining: info += "kTraining/"; break;
247 /// case Types::kTesting: info += "kTesting/"; break;
248 /// default: break;
249 /// }
250 /// switch(analysistype) {
251 /// case Types::kClassification: info += "kClassification"; break;
252 /// case Types::kRegression: info += "kRegression"; break;
253 /// case Types::kNoAnalysisType: info += "kNoAnalysisType"; break;
254 /// case Types::kMaxAnalysisType:info += "kMaxAnalysisType"; break;
255 /// }
256 
259  Types::EAnalysisType analysistype )
260 {
261  UInt_t t = TreeIndex(type);
262  if (t<fResults.size()) {
263  const std::map< TString, Results* >& resultsForType = fResults[t];
264  std::map< TString, Results* >::const_iterator it = resultsForType.find(resultsName);
265  if (it!=resultsForType.end()) {
266  //Log() << kINFO << " GetResults("<<info<<") returns existing result." << Endl;
267  return it->second;
268  }
269  }
270  else {
271  fResults.resize(t+1);
272  }
273 
274  // nothing found
275 
276  Results * newresults = 0;
277  switch(analysistype) {
279  newresults = new ResultsClassification(&fdsi,resultsName);
280  break;
281  case Types::kRegression:
282  newresults = new ResultsRegression(&fdsi,resultsName);
283  break;
284  case Types::kMulticlass:
285  newresults = new ResultsMulticlass(&fdsi,resultsName);
286  break;
288  newresults = new ResultsClassification(&fdsi,resultsName);
289  break;
291  //Log() << kINFO << " GetResults("<<info<<") can't create new one." << Endl;
292  return 0;
293  break;
294  }
295 
296  newresults->SetTreeType( type );
297  fResults[t][resultsName] = newresults;
298 
299  //Log() << kINFO << " GetResults("<<info<<") builds new result." << Endl;
300  return newresults;
301 }
302 ////////////////////////////////////////////////////////////////////////////////
303 /// delete the results stored for this particulary
304 /// Method instance (here appareantly called resultsName instead of MethodTitle
305 /// Tree type (Training, testing etc..)
306 /// Analysis Type (Classification, Multiclass, Regression etc..)
307 
308 void TMVA::DataSet::DeleteResults( const TString & resultsName,
310  Types::EAnalysisType /* analysistype */ )
311 {
312  if (fResults.empty()) return;
313 
314  if (UInt_t(type) > fResults.size()){
315  Log()<<kFATAL<< "you asked for an Treetype (training/testing/...)"
316  << " whose index " << type << " does not exist " << Endl;
317  }
318  std::map< TString, Results* >& resultsForType = fResults[UInt_t(type)];
319  std::map< TString, Results* >::iterator it = resultsForType.find(resultsName);
320  if (it!=resultsForType.end()) {
321  Log() << kDEBUG << " Delete Results previous existing result:" << resultsName
322  << " of type " << type << Endl;
323  delete it->second;
324  resultsForType.erase(it->first);
325  }
326  else {
327  Log() << kINFO << "could not fine Result class of " << resultsName
328  << " of type " << type << " which I should have deleted" << Endl;
329  }
330 }
331 ////////////////////////////////////////////////////////////////////////////////
332 /// divide training set
333 
335 {
336  Int_t tOrg = TreeIndex(Types::kTrainingOriginal),tTrn = TreeIndex(Types::kTraining);
337  // not changing anything ??
338  if (fBlockBelongToTraining.size() == blockNum) return;
339  // storing the original training vector
340  if (fBlockBelongToTraining.size() == 1) {
341  if (fEventCollection[tOrg] == 0)
342  fEventCollection[tOrg]=new std::vector<TMVA::Event*>(fEventCollection[tTrn]->size());
343  fEventCollection[tOrg]->clear();
344  for (UInt_t i=0; i<fEventCollection[tTrn]->size(); i++)
345  fEventCollection[tOrg]->push_back((*fEventCollection[tTrn])[i]);
346  fClassEvents[tOrg] = fClassEvents[tTrn];
347  }
348  //reseting the event division vector
349  fBlockBelongToTraining.clear();
350  for (UInt_t i=0 ; i < blockNum ; i++) fBlockBelongToTraining.push_back(kTRUE);
351 
352  ApplyTrainingSetDivision();
353 }
354 
355 ////////////////////////////////////////////////////////////////////////////////
356 /// apply division of data set
357 
359 {
360  Int_t tOrg = TreeIndex(Types::kTrainingOriginal), tTrn = TreeIndex(Types::kTraining), tVld = TreeIndex(Types::kValidation);
361  fEventCollection[tTrn]->clear();
362  if (fEventCollection[tVld]==0)
363  fEventCollection[tVld] = new std::vector<TMVA::Event*>(fEventCollection[tOrg]->size());
364  fEventCollection[tVld]->clear();
365 
366  //creating the new events collections, notice that the events that can't be evenly divided belong to the last event
367  for (UInt_t i=0; i<fEventCollection[tOrg]->size(); i++) {
368  if (fBlockBelongToTraining[i % fBlockBelongToTraining.size()])
369  fEventCollection[tTrn]->push_back((*fEventCollection[tOrg])[i]);
370  else
371  fEventCollection[tVld]->push_back((*fEventCollection[tOrg])[i]);
372  }
373 }
374 
375 ////////////////////////////////////////////////////////////////////////////////
376 /// move training block
377 
379 {
380  if (dest == Types::kValidation)
381  fBlockBelongToTraining[blockInd]=kFALSE;
382  else
383  fBlockBelongToTraining[blockInd]=kTRUE;
384  if (applyChanges) ApplyTrainingSetDivision();
385 }
386 
387 ////////////////////////////////////////////////////////////////////////////////
388 /// return number of signal test events in dataset
389 
391 {
392  return GetNClassEvents(Types::kTesting, fdsi.GetClassInfo("Signal")->GetNumber() );
393 }
394 
395 ////////////////////////////////////////////////////////////////////////////////
396 /// return number of background test events in dataset
397 
399 {
400  return GetNClassEvents(Types::kTesting, fdsi.GetClassInfo("Background")->GetNumber() );
401 }
402 
403 ////////////////////////////////////////////////////////////////////////////////
404 /// return number of signal training events in dataset
405 
407 {
408  return GetNClassEvents(Types::kTraining, fdsi.GetClassInfo("Signal")->GetNumber() );
409 }
410 
411 ////////////////////////////////////////////////////////////////////////////////
412 /// return number of background training events in dataset
413 
415 {
416  return GetNClassEvents(Types::kTraining, fdsi.GetClassInfo("Background")->GetNumber() );
417 }
418 
419 ////////////////////////////////////////////////////////////////////////////////
420 /// initialize random or importance sampling
421 
422 void TMVA::DataSet::InitSampling( Float_t fraction, Float_t weight, UInt_t seed )
423 {
424  // add a random generator if not yet present
425  if (fSamplingRandom == 0 ) fSamplingRandom = new TRandom3( seed );
426 
427  // first, clear the lists
428  std::vector< std::pair< Float_t, Long64_t >* > evtList;
429  std::vector< std::pair< Float_t, Long64_t >* >::iterator it;
430 
431  Int_t treeIdx = TreeIndex( GetCurrentType() );
432 
433  if (fSamplingEventList.size() < UInt_t(treeIdx+1) ) fSamplingEventList.resize(treeIdx+1);
434  if (fSamplingSelected.size() < UInt_t(treeIdx+1) ) fSamplingSelected.resize(treeIdx+1);
435  for (it = fSamplingEventList.at(treeIdx).begin(); it != fSamplingEventList.at(treeIdx).end(); it++ ) delete (*it);
436  fSamplingEventList.at(treeIdx).clear();
437  fSamplingSelected.at(treeIdx).clear();
438 
439  if (fSampling.size() < UInt_t(treeIdx+1) ) fSampling.resize(treeIdx+1);
440  if (fSamplingNEvents.size() < UInt_t(treeIdx+1) ) fSamplingNEvents.resize(treeIdx+1);
441  if (fSamplingWeight.size() < UInt_t(treeIdx+1) ) fSamplingWeight.resize(treeIdx+1);
442 
443  if (fraction > 0.999999 || fraction < 0.0000001) {
444  fSampling.at( treeIdx ) = false;
445  fSamplingNEvents.at( treeIdx ) = 0;
446  fSamplingWeight.at( treeIdx ) = 1.0;
447  return;
448  }
449 
450  // for the initialization, the sampling has to be turned off, afterwards we will turn it on
451  fSampling.at( treeIdx ) = false;
452 
453  fSamplingNEvents.at( treeIdx ) = Int_t(fraction*GetNEvents());
454  fSamplingWeight.at( treeIdx ) = weight;
455 
456  Long64_t nEvts = GetNEvents();
457  fSamplingEventList.at( treeIdx ).reserve( nEvts );
458  fSamplingSelected.at( treeIdx ).reserve( fSamplingNEvents.at(treeIdx) );
459  for (Long64_t ievt=0; ievt<nEvts; ievt++) {
460  std::pair<Float_t,Long64_t> *p = new std::pair<Float_t,Long64_t>(1.0,ievt);
461  fSamplingEventList.at( treeIdx ).push_back( p );
462  }
463 
464  // now turn sampling on
465  fSampling.at( treeIdx ) = true;
466 }
467 
468 
469 ////////////////////////////////////////////////////////////////////////////////
470 /// create an event sampling (random or importance sampling)
471 
473 {
474  Int_t treeIdx = TreeIndex( GetCurrentType() );
475 
476  if (!fSampling.at(treeIdx) ) return;
477 
478  if (fSamplingRandom == 0 )
479  Log() << kFATAL
480  << "no random generator present for creating a random/importance sampling (initialized?)" << Endl;
481 
482  // delete the previous selection
483  fSamplingSelected.at(treeIdx).clear();
484 
485  // create a temporary event-list
486  std::vector< std::pair< Float_t, Long64_t >* > evtList;
487  std::vector< std::pair< Float_t, Long64_t >* >::iterator evtListIt;
488 
489  // some variables
490  Float_t sumWeights = 0;
491 
492  // make a copy of the event-list
493  evtList.assign( fSamplingEventList.at(treeIdx).begin(), fSamplingEventList.at(treeIdx).end() );
494 
495  // sum up all the weights (internal weights for importance sampling)
496  for (evtListIt = evtList.begin(); evtListIt != evtList.end(); evtListIt++) {
497  sumWeights += (*evtListIt)->first;
498  }
499  evtListIt = evtList.begin();
500 
501  // random numbers
502  std::vector< Float_t > rnds;
503  rnds.reserve(fSamplingNEvents.at(treeIdx));
504 
505  Float_t pos = 0;
506  for (Int_t i = 0; i < fSamplingNEvents.at(treeIdx); i++) {
507  pos = fSamplingRandom->Rndm()*sumWeights;
508  rnds.push_back( pos );
509  }
510 
511  // sort the random numbers
512  std::sort(rnds.begin(),rnds.end());
513 
514  // select the events according to the random numbers
515  std::vector< Float_t >::iterator rndsIt = rnds.begin();
516  Float_t runningSum = 0.000000001;
517  for (evtListIt = evtList.begin(); evtListIt != evtList.end();) {
518  runningSum += (*evtListIt)->first;
519  if (runningSum >= (*rndsIt)) {
520  fSamplingSelected.at(treeIdx).push_back( (*evtListIt) );
521  evtListIt = evtList.erase( evtListIt );
522 
523  rndsIt++;
524  if (rndsIt == rnds.end() ) break;
525  }
526  else {
527  evtListIt++;
528  }
529  }
530 }
531 
532 ////////////////////////////////////////////////////////////////////////////////
533 /// increase the importance sampling weight of the event
534 /// when not successful and decrease it when successful
535 
536 void TMVA::DataSet::EventResult( Bool_t successful, Long64_t evtNumber )
537 {
538 
539  if (!fSampling.at(fCurrentTreeIdx)) return;
540  if (fSamplingWeight.at(fCurrentTreeIdx) > 0.99999999999) return;
541 
542  Long64_t start = 0;
543  Long64_t stop = fSamplingEventList.at(fCurrentTreeIdx).size() -1;
544  if (evtNumber >= 0) {
545  start = evtNumber;
546  stop = evtNumber;
547  }
548  for ( Long64_t iEvt = start; iEvt <= stop; iEvt++ ){
549  if (Long64_t(fSamplingEventList.at(fCurrentTreeIdx).size()) < iEvt) {
550  Log() << kWARNING << "event number (" << iEvt
551  << ") larger than number of sampled events ("
552  << fSamplingEventList.at(fCurrentTreeIdx).size() << " of tree " << fCurrentTreeIdx << ")" << Endl;
553  return;
554  }
555  Float_t weight = fSamplingEventList.at(fCurrentTreeIdx).at( iEvt )->first;
556  if (!successful) {
557  // weight /= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
558  weight /= fSamplingWeight.at(fCurrentTreeIdx);
559  if (weight > 1.0 ) weight = 1.0;
560  }
561  else {
562  // weight *= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
563  weight *= fSamplingWeight.at(fCurrentTreeIdx);
564  }
565  fSamplingEventList.at(fCurrentTreeIdx).at( iEvt )->first = weight;
566  }
567 }
568 
569 
570 ////////////////////////////////////////////////////////////////////////////////
571 /// create the test/trainings tree with all the variables, the weights, the classes, the targets, the spectators, the MVA outputs
572 
574 {
575  Log() << kDEBUG << "GetTree(" << ( type==Types::kTraining ? "training" : "testing" ) << ")" << Endl;
576 
577  // the dataset does not hold the tree, this function returns a new tree everytime it is called
578 
579  if (type!=Types::kTraining && type!=Types::kTesting) return 0;
580 
581  Types::ETreeType savedType = GetCurrentType();
582 
583  SetCurrentType(type);
584  const UInt_t t = TreeIndex(type);
585  if (fResults.size() <= t) {
586  Log() << kWARNING << "No results for treetype " << ( type==Types::kTraining ? "training" : "testing" )
587  << " found. Size=" << fResults.size() << Endl;
588  }
589 
590  // return number of background training events in dataset
591  TString treeName( (type == Types::kTraining ? "TrainTree" : "TestTree" ) );
592  TTree *tree = new TTree(treeName,treeName);
593 
594  Float_t *varVals = new Float_t[fdsi.GetNVariables()];
595  Float_t *tgtVals = new Float_t[fdsi.GetNTargets()];
596  Float_t *visVals = new Float_t[fdsi.GetNSpectators()];
597 
598  UInt_t cls;
599  Float_t weight;
600  // TObjString *className = new TObjString();
601  char *className = new char[40];
602 
603 
604  //Float_t metVals[fResults.at(t).size()][Int_t(fdsi.GetNTargets()+1)];
605  // replace by: [Joerg]
606  Float_t **metVals = new Float_t*[fResults.at(t).size()];
607  for(UInt_t i=0; i<fResults.at(t).size(); i++ )
608  metVals[i] = new Float_t[fdsi.GetNTargets()+fdsi.GetNClasses()];
609 
610  // create branches for event-variables
611  tree->Branch( "classID", &cls, "classID/I" );
612  tree->Branch( "className",(void*)className, "className/C" );
613 
614  // create all branches for the variables
615  Int_t n = 0;
616  for (std::vector<VariableInfo>::const_iterator itVars = fdsi.GetVariableInfos().begin();
617  itVars != fdsi.GetVariableInfos().end(); itVars++) {
618 
619  // has to be changed to take care of types different than float: TODO
620  tree->Branch( (*itVars).GetInternalName(), &varVals[n], (*itVars).GetInternalName()+TString("/F") );
621  n++;
622  }
623  // create the branches for the targets
624  n = 0;
625  for (std::vector<VariableInfo>::const_iterator itTgts = fdsi.GetTargetInfos().begin();
626  itTgts != fdsi.GetTargetInfos().end(); itTgts++) {
627  // has to be changed to take care of types different than float: TODO
628  tree->Branch( (*itTgts).GetInternalName(), &tgtVals[n], (*itTgts).GetInternalName()+TString("/F") );
629  n++;
630  }
631  // create the branches for the spectator variables
632  n = 0;
633  for (std::vector<VariableInfo>::const_iterator itVis = fdsi.GetSpectatorInfos().begin();
634  itVis != fdsi.GetSpectatorInfos().end(); itVis++) {
635  // has to be changed to take care of types different than float: TODO
636  tree->Branch( (*itVis).GetInternalName(), &visVals[n], (*itVis).GetInternalName()+TString("/F") );
637  n++;
638  }
639 
640  tree->Branch( "weight", &weight, "weight/F" );
641 
642  // create all the branches for the results
643  n = 0;
644  for (std::map< TString, Results* >::iterator itMethod = fResults.at(t).begin();
645  itMethod != fResults.at(t).end(); itMethod++) {
646 
647 
648  Log() << kDEBUG << "analysis type: " << (itMethod->second->GetAnalysisType()==Types::kRegression ? "Regression" :
649  (itMethod->second->GetAnalysisType()==Types::kMulticlass ? "Multiclass" : "Classification" )) << Endl;
650 
651  if (itMethod->second->GetAnalysisType() == Types::kClassification) {
652  // classification
653  tree->Branch( itMethod->first, &(metVals[n][0]), itMethod->first + "/F" );
654  }
655  else if (itMethod->second->GetAnalysisType() == Types::kMulticlass) {
656  // multiclass classification
657  TString leafList("");
658  for (UInt_t iCls = 0; iCls < fdsi.GetNClasses(); iCls++) {
659  if (iCls > 0) leafList.Append( ":" );
660  leafList.Append( fdsi.GetClassInfo( iCls )->GetName() );
661  leafList.Append( "/F" );
662  }
663  Log() << kDEBUG << "itMethod->first " << itMethod->first << " LEAFLIST: "
664  << leafList << " itMethod->second " << itMethod->second << Endl;
665  tree->Branch( itMethod->first, (metVals[n]), leafList );
666  }
667  else if (itMethod->second->GetAnalysisType() == Types::kRegression) {
668  // regression
669  TString leafList("");
670  for (UInt_t iTgt = 0; iTgt < fdsi.GetNTargets(); iTgt++) {
671  if (iTgt > 0) leafList.Append( ":" );
672  leafList.Append( fdsi.GetTargetInfo( iTgt ).GetInternalName() );
673  // leafList.Append( fdsi.GetTargetInfo( iTgt ).GetLabel() );
674  leafList.Append( "/F" );
675  }
676  Log() << kDEBUG << "itMethod->first " << itMethod->first << " LEAFLIST: "
677  << leafList << " itMethod->second " << itMethod->second << Endl;
678  tree->Branch( itMethod->first, (metVals[n]), leafList );
679  }
680  else {
681  Log() << kWARNING << "Unknown analysis type for result found when writing TestTree." << Endl;
682  }
683  n++;
684 
685  }
686 
687  // loop through all the events
688  for (Long64_t iEvt = 0; iEvt < GetNEvents( type ); iEvt++) {
689  // write the event-variables
690  const Event* ev = GetEvent( iEvt );
691  // write the classnumber and the classname
692  cls = ev->GetClass();
693  weight = ev->GetWeight();
694  TString tmp = fdsi.GetClassInfo( cls )->GetName();
695  for (Int_t itmp = 0; itmp < tmp.Sizeof(); itmp++) {
696  className[itmp] = tmp(itmp);
697  className[itmp+1] = 0;
698  }
699 
700  // write the variables, targets and spectator variables
701  for (UInt_t ivar = 0; ivar < ev->GetNVariables(); ivar++) varVals[ivar] = ev->GetValue( ivar );
702  for (UInt_t itgt = 0; itgt < ev->GetNTargets(); itgt++) tgtVals[itgt] = ev->GetTarget( itgt );
703  for (UInt_t ivis = 0; ivis < ev->GetNSpectators(); ivis++) visVals[ivis] = ev->GetSpectator( ivis );
704 
705 
706  // loop through all the results and write the branches
707  n=0;
708  for (std::map<TString, Results*>::iterator itMethod = fResults.at(t).begin();
709  itMethod != fResults.at(t).end(); itMethod++) {
710  Results* results = itMethod->second;
711 
712  const std::vector< Float_t >& vals = results->operator[](iEvt);
713 
714  if (itMethod->second->GetAnalysisType() == Types::kClassification) {
715  // classification
716  metVals[n][0] = vals[0];
717  }
718  else if (itMethod->second->GetAnalysisType() == Types::kMulticlass) {
719  // multiclass classification
720  for (UInt_t nCls = 0, nClsEnd=fdsi.GetNClasses(); nCls < nClsEnd; nCls++) {
721  Float_t val = vals.at(nCls);
722  metVals[n][nCls] = val;
723  }
724  }
725  else if (itMethod->second->GetAnalysisType() == Types::kRegression) {
726  // regression
727  for (UInt_t nTgts = 0; nTgts < fdsi.GetNTargets(); nTgts++) {
728  Float_t val = vals.at(nTgts);
729  metVals[n][nTgts] = val;
730  }
731  }
732  n++;
733  }
734  // fill the variables into the tree
735  tree->Fill();
736  }
737 
738  Log() << kINFO << "Created tree '" << tree->GetName() << "' with " << tree->GetEntries() << " events" << Endl;
739 
740  SetCurrentType(savedType);
741 
742  delete[] varVals;
743  delete[] tgtVals;
744  delete[] visVals;
745 
746  for(UInt_t i=0; i<fResults.at(t).size(); i++ )
747  delete[] metVals[i];
748  delete[] metVals;
749 
750  delete[] className;
751 
752  return tree;
753 }
754 
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:210
void SetEventCollection(std::vector< Event * > *, Types::ETreeType)
Sets the event collection (by DataSetFactory)
Definition: DataSet.cxx:229
Random number generator class based on M.
Definition: TRandom3.h:29
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
long long Long64_t
Definition: RtypesCore.h:69
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Definition: DataSet.cxx:219
float Float_t
Definition: RtypesCore.h:53
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:314
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Definition: Event.cxx:256
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4328
TRandom3 * fSamplingRandom
Definition: DataSet.h:176
EAnalysisType
Definition: Types.h:124
void SetTreeType(Types::ETreeType type)
Definition: Results.h:72
Basic string class.
Definition: TString.h:137
UInt_t GetNSpectators() const
accessor to the number of spectators
Definition: Event.cxx:322
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< Char_t > fBlockBelongToTraining
Definition: DataSet.h:187
const Bool_t kFALSE
Definition: Rtypes.h:92
void ClearNClassEvents(Int_t type)
Definition: DataSet.cxx:138
STL namespace.
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:376
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:231
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:414
const char * Data() const
Definition: TString.h:349
virtual ~DataSet()
destructor
Definition: DataSet.cxx:94
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
Definition: DataSet.cxx:573
TString & Append(const char *cs)
Definition: TString.h:492
std::vector< std::vector< double > > Data
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:303
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
Definition: DataSet.cxx:378
void ApplyTrainingSetDivision()
apply division of data set
Definition: DataSet.cxx:358
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
Definition: DataSet.cxx:257
std::vector< std::vector< Long64_t > > fClassEvents
Definition: DataSet.h:180
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:390
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particulary Method instance (here appareantly called resultsName i...
Definition: DataSet.cxx:308
unsigned int UInt_t
Definition: RtypesCore.h:42
void DivideTrainingSet(UInt_t blockNum)
divide training set
Definition: DataSet.cxx:334
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
Definition: DataSet.cxx:167
const TString & GetName() const
Definition: ClassInfo.h:72
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:398
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition: DataSet.cxx:472
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:129
const Event * GetEvent() const
Definition: DataSet.cxx:180
std::vector< Char_t > fSampling
Definition: DataSet.h:171
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition: DataSet.cxx:536
std::vector< Float_t > fSamplingWeight
Definition: DataSet.h:173
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:406
int type
Definition: TGX11.cxx:120
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:202
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition: DataSet.cxx:194
std::vector< Int_t > fSamplingNEvents
Definition: DataSet.h:172
UInt_t GetClass() const
Definition: Event.h:86
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:146
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:101
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1624
#define dest(otri, vertexptr)
Definition: triangle.c:1040
#define NULL
Definition: Rtypes.h:82
virtual Int_t Sizeof() const
Returns size string will occupy on I/O buffer.
Definition: TString.cxx:1297
virtual Long64_t GetEntries() const
Definition: TTree.h:382
A TTree object has a header with a name and a title.
Definition: TTree.h:94
const Bool_t kTRUE
Definition: Rtypes.h:91
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition: DataSet.cxx:422
const Int_t n
Definition: legend1.C:16
Definition: math.cpp:60