ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
DataSet.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : DataSet *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
17  * *
18  * Copyright (c) 2006: *
19  * CERN, Switzerland *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #include <vector>
28 #include <algorithm>
29 #include <cstdlib>
30 #include <stdexcept>
31 #include <algorithm>
32 
33 #ifndef ROOT_TMVA_DataSetInfo
34 #include "TMVA/DataSetInfo.h"
35 #endif
36 #ifndef ROOT_TMVA_DataSet
37 #include "TMVA/DataSet.h"
38 #endif
39 #ifndef ROOT_TMVA_Event
40 #include "TMVA/Event.h"
41 #endif
42 #ifndef ROOT_TMVA_MsgLogger
43 #include "TMVA/MsgLogger.h"
44 #endif
45 #ifndef ROOT_TMVA_ResultsRegression
46 #include "TMVA/ResultsRegression.h"
47 #endif
48 #ifndef ROOT_TMVA_ResultsClassification
50 #endif
51 #ifndef ROOT_TMVA_ResultsMulticlass
52 #include "TMVA/ResultsMulticlass.h"
53 #endif
54 #ifndef ROOT_TMVA_Configurable
55 #include "TMVA/Configurable.h"
56 #endif
57 
58 #include "TMVA/Types.h"
59 #include "TMVA/Results.h"
60 #include "TMVA/VariableInfo.h"
61 
62 #include "TRandom3.h"
63 
64 ////////////////////////////////////////////////////////////////////////////////
65 /// constructor
66 
68  : fdsi(dsi),
69  fEventCollection(4,(std::vector<Event*>*)0),
70  fCurrentTreeIdx(0),
71  fCurrentEventIdx(0),
72  fHasNegativeEventWeights(kFALSE),
73  fLogger( new MsgLogger(TString(TString("Dataset:")+dsi.GetName()).Data()) ),
74  fTrainingBlockSize(0)
75 {
76  for (UInt_t i=0; i<4; i++) fEventCollection[i] = new std::vector<Event*>;
77 
78  fClassEvents.resize(4);
79  fBlockBelongToTraining.reserve(10);
80  fBlockBelongToTraining.push_back(kTRUE);
81 
82  // sampling
83  fSamplingRandom = 0;
84 
85  Int_t treeNum = 2;
86  fSampling.resize( treeNum );
87  fSamplingNEvents.resize( treeNum );
88  fSamplingWeight.resize(treeNum);
89 
90  for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
91  fSampling.at(treeIdx) = kFALSE;
92  fSamplingNEvents.at(treeIdx) = 0;
93  fSamplingWeight.at(treeIdx) = 1.0;
94  }
95 }
96 
97 ////////////////////////////////////////////////////////////////////////////////
98 /// destructor
99 
101 {
102  // delete event collection
103  Bool_t deleteEvents=true; // dataset owns the events /JS
104  DestroyCollection( Types::kTraining, deleteEvents );
105  DestroyCollection( Types::kTesting, deleteEvents );
106 
107  fBlockBelongToTraining.clear();
108  // delete results
109  for (std::vector< std::map< TString, Results* > >::iterator it = fResults.begin(); it != fResults.end(); it++) {
110  for (std::map< TString, Results* >::iterator itMap = (*it).begin(); itMap != (*it).end(); itMap++) {
111  delete itMap->second;
112  }
113  }
114 
115  // delete sampling
116  if (fSamplingRandom != 0 ) delete fSamplingRandom;
117 
118  std::vector< std::pair< Float_t, Long64_t >* >::iterator itEv;
119  std::vector< std::vector<std::pair< Float_t, Long64_t >* > >::iterator treeIt;
120  for (treeIt = fSamplingEventList.begin(); treeIt != fSamplingEventList.end(); treeIt++ ) {
121  for (itEv = (*treeIt).begin(); itEv != (*treeIt).end(); itEv++) {
122  delete (*itEv);
123  }
124  }
125 
126  // need also to delete fEventCollections[2] and [3], not sure if they are used
127  DestroyCollection( Types::kValidation, deleteEvents );
128  DestroyCollection( Types::kTrainingOriginal, deleteEvents );
129 
130  delete fLogger;
131 }
132 
133 ////////////////////////////////////////////////////////////////////////////////
134 
136 {
137  if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
138  if (fClassEvents.at( type ).size() < classNumber+1) fClassEvents.at( type ).resize( classNumber+1 );
139  fClassEvents.at( type ).at( classNumber ) += 1;
140 }
141 
142 ////////////////////////////////////////////////////////////////////////////////
143 
145 {
146  if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
147  fClassEvents.at( type ).clear();
148 }
149 
150 ////////////////////////////////////////////////////////////////////////////////
151 
153 {
154  try {
155  return fClassEvents.at(type).at(classNumber);
156  }
157  catch (std::out_of_range excpt) {
158  ClassInfo* ci = fdsi.GetClassInfo( classNumber );
159  Log() << kFATAL << "No " << (type==0?"training":(type==1?"testing":"_unknown_type_"))
160  << " events for class " << (ci==NULL?"_no_name_known_":ci->GetName().Data()) << " (index # "<<classNumber<<")"
161  << " available. Check if all class names are spelled correctly and if events are"
162  << " passing the selection cuts." << Endl;
163  }
164  catch (...) {
165  Log() << kFATAL << "ERROR/CAUGHT : DataSet/GetNClassEvents, .. unknown error" << Endl;
166  }
167  return 0;
168 }
169 
170 ////////////////////////////////////////////////////////////////////////////////
171 /// destroys the event collection (events + vector)
172 
174 {
175  UInt_t i = TreeIndex(type);
176  if (i>=fEventCollection.size() || fEventCollection[i]==0) return;
177  if (deleteEvents) {
178  for (UInt_t j=0; j<fEventCollection[i]->size(); j++) delete (*fEventCollection[i])[j];
179  }
180  delete fEventCollection[i];
181  fEventCollection[i]=0;
182 }
183 
184 ////////////////////////////////////////////////////////////////////////////////
185 
187 {
188  if (fSampling.size() > UInt_t(fCurrentTreeIdx) && fSampling.at(fCurrentTreeIdx)) {
189  Long64_t iEvt = fSamplingSelected.at(fCurrentTreeIdx).at( fCurrentEventIdx )->second;
190  return (*(fEventCollection.at(fCurrentTreeIdx))).at(iEvt);
191  }
192  else {
193  return (*(fEventCollection.at(fCurrentTreeIdx))).at(fCurrentEventIdx);
194  }
195 }
196 
197 ////////////////////////////////////////////////////////////////////////////////
198 /// access the number of variables through the datasetinfo
199 
201 {
202  return fdsi.GetNVariables();
203 }
204 
205 ////////////////////////////////////////////////////////////////////////////////
206 /// access the number of targets through the datasetinfo
207 
209 {
210  return fdsi.GetNTargets();
211 }
212 
213 ////////////////////////////////////////////////////////////////////////////////
214 /// access the number of targets through the datasetinfo
215 
217 {
218  return fdsi.GetNSpectators();
219 }
220 
221 ////////////////////////////////////////////////////////////////////////////////
222 /// add event to event list
223 /// after which the event is owned by the dataset
224 
226 {
227  fEventCollection.at(Int_t(type))->push_back(ev);
228  if (ev->GetWeight()<0) fHasNegativeEventWeights = kTRUE;
229  fEvtCollIt=fEventCollection.at(fCurrentTreeIdx)->begin();
230 }
231 
232 ////////////////////////////////////////////////////////////////////////////////
233 /// Sets the event collection (by DataSetFactory)
234 
235 void TMVA::DataSet::SetEventCollection(std::vector<TMVA::Event*>* events, Types::ETreeType type)
236 {
237  Bool_t deleteEvents = true;
238  DestroyCollection(type,deleteEvents);
239 
240  const Int_t t = TreeIndex(type);
241  ClearNClassEvents( type );
242  fEventCollection.at(t) = events;
243  for (std::vector<Event*>::iterator it = fEventCollection.at(t)->begin(); it < fEventCollection.at(t)->end(); it++) {
244  IncrementNClassEvents( t, (*it)->GetClass() );
245  }
246  fEvtCollIt=fEventCollection.at(fCurrentTreeIdx)->begin();
247 }
248 
249 ////////////////////////////////////////////////////////////////////////////////
250 /// TString info(resultsName+"/");
251 /// switch(type) {
252 /// case Types::kTraining: info += "kTraining/"; break;
253 /// case Types::kTesting: info += "kTesting/"; break;
254 /// default: break;
255 /// }
256 /// switch(analysistype) {
257 /// case Types::kClassification: info += "kClassification"; break;
258 /// case Types::kRegression: info += "kRegression"; break;
259 /// case Types::kNoAnalysisType: info += "kNoAnalysisType"; break;
260 /// case Types::kMaxAnalysisType:info += "kMaxAnalysisType"; break;
261 /// }
262 
265  Types::EAnalysisType analysistype )
266 {
267  UInt_t t = TreeIndex(type);
268  if (t<fResults.size()) {
269  const std::map< TString, Results* >& resultsForType = fResults[t];
270  std::map< TString, Results* >::const_iterator it = resultsForType.find(resultsName);
271  if (it!=resultsForType.end()) {
272  //Log() << kINFO << " GetResults("<<info<<") returns existing result." << Endl;
273  return it->second;
274  }
275  }
276  else {
277  fResults.resize(t+1);
278  }
279 
280  // nothing found
281 
282  Results * newresults = 0;
283  switch(analysistype) {
285  newresults = new ResultsClassification(&fdsi,resultsName);
286  break;
287  case Types::kRegression:
288  newresults = new ResultsRegression(&fdsi,resultsName);
289  break;
290  case Types::kMulticlass:
291  newresults = new ResultsMulticlass(&fdsi,resultsName);
292  break;
294  newresults = new ResultsClassification(&fdsi,resultsName);
295  break;
297  //Log() << kINFO << " GetResults("<<info<<") can't create new one." << Endl;
298  return 0;
299  break;
300  }
301 
302  newresults->SetTreeType( type );
303  fResults[t][resultsName] = newresults;
304 
305  //Log() << kINFO << " GetResults("<<info<<") builds new result." << Endl;
306  return newresults;
307 }
308 ////////////////////////////////////////////////////////////////////////////////
309 /// delete the results stored for this particulary
310 /// Method instance (here appareantly called resultsName instead of MethodTitle
311 /// Tree type (Training, testing etc..)
312 /// Analysis Type (Classification, Multiclass, Regression etc..)
313 
314 void TMVA::DataSet::DeleteResults( const TString & resultsName,
316  Types::EAnalysisType /* analysistype */ )
317 {
318  if (fResults.empty()) return;
319 
320  if (UInt_t(type) > fResults.size()){
321  Log()<<kFATAL<< "you asked for an Treetype (training/testing/...)"
322  << " whose index " << type << " does not exist " << Endl;
323  }
324  std::map< TString, Results* >& resultsForType = fResults[UInt_t(type)];
325  std::map< TString, Results* >::iterator it = resultsForType.find(resultsName);
326  if (it!=resultsForType.end()) {
327  Log() << kDEBUG << " Delete Results previous existing result:" << resultsName
328  << " of type " << type << Endl;
329  delete it->second;
330  resultsForType.erase(it->first);
331  }
332  else {
333  Log() << kINFO << "could not fine Result class of " << resultsName
334  << " of type " << type << " which I should have deleted" << Endl;
335  }
336 }
337 ////////////////////////////////////////////////////////////////////////////////
338 /// divide training set
339 
341 {
342  Int_t tOrg = TreeIndex(Types::kTrainingOriginal),tTrn = TreeIndex(Types::kTraining);
343  // not changing anything ??
344  if (fBlockBelongToTraining.size() == blockNum) return;
345  // storing the original training vector
346  if (fBlockBelongToTraining.size() == 1) {
347  if (fEventCollection[tOrg] == 0)
348  fEventCollection[tOrg]=new std::vector<TMVA::Event*>(fEventCollection[tTrn]->size());
349  fEventCollection[tOrg]->clear();
350  for (UInt_t i=0; i<fEventCollection[tTrn]->size(); i++)
351  fEventCollection[tOrg]->push_back((*fEventCollection[tTrn])[i]);
352  fClassEvents[tOrg] = fClassEvents[tTrn];
353  }
354  //reseting the event division vector
355  fBlockBelongToTraining.clear();
356  for (UInt_t i=0 ; i < blockNum ; i++) fBlockBelongToTraining.push_back(kTRUE);
357 
358  ApplyTrainingSetDivision();
359 }
360 
361 ////////////////////////////////////////////////////////////////////////////////
362 /// apply division of data set
363 
365 {
366  Int_t tOrg = TreeIndex(Types::kTrainingOriginal), tTrn = TreeIndex(Types::kTraining), tVld = TreeIndex(Types::kValidation);
367  fEventCollection[tTrn]->clear();
368  if (fEventCollection[tVld]==0)
369  fEventCollection[tVld] = new std::vector<TMVA::Event*>(fEventCollection[tOrg]->size());
370  fEventCollection[tVld]->clear();
371 
372  //creating the new events collections, notice that the events that can't be evenly divided belong to the last event
373  for (UInt_t i=0; i<fEventCollection[tOrg]->size(); i++) {
374  if (fBlockBelongToTraining[i % fBlockBelongToTraining.size()])
375  fEventCollection[tTrn]->push_back((*fEventCollection[tOrg])[i]);
376  else
377  fEventCollection[tVld]->push_back((*fEventCollection[tOrg])[i]);
378  }
379 }
380 
381 ////////////////////////////////////////////////////////////////////////////////
382 /// move training block
383 
385 {
386  if (dest == Types::kValidation)
387  fBlockBelongToTraining[blockInd]=kFALSE;
388  else
389  fBlockBelongToTraining[blockInd]=kTRUE;
390  if (applyChanges) ApplyTrainingSetDivision();
391 }
392 
393 ////////////////////////////////////////////////////////////////////////////////
394 /// return number of signal test events in dataset
395 
397 {
398  return GetNClassEvents(Types::kTesting, fdsi.GetClassInfo("Signal")->GetNumber() );
399 }
400 
401 ////////////////////////////////////////////////////////////////////////////////
402 /// return number of background test events in dataset
403 
405 {
406  return GetNClassEvents(Types::kTesting, fdsi.GetClassInfo("Background")->GetNumber() );
407 }
408 
409 ////////////////////////////////////////////////////////////////////////////////
410 /// return number of signal training events in dataset
411 
413 {
414  return GetNClassEvents(Types::kTraining, fdsi.GetClassInfo("Signal")->GetNumber() );
415 }
416 
417 ////////////////////////////////////////////////////////////////////////////////
418 /// return number of background training events in dataset
419 
421 {
422  return GetNClassEvents(Types::kTraining, fdsi.GetClassInfo("Background")->GetNumber() );
423 }
424 
425 ////////////////////////////////////////////////////////////////////////////////
426 /// initialize random or importance sampling
427 
428 void TMVA::DataSet::InitSampling( Float_t fraction, Float_t weight, UInt_t seed )
429 {
430  // add a random generator if not yet present
431  if (fSamplingRandom == 0 ) fSamplingRandom = new TRandom3( seed );
432 
433  // first, clear the lists
434  std::vector< std::pair< Float_t, Long64_t >* > evtList;
435  std::vector< std::pair< Float_t, Long64_t >* >::iterator it;
436 
437  Int_t treeIdx = TreeIndex( GetCurrentType() );
438 
439  if (fSamplingEventList.size() < UInt_t(treeIdx+1) ) fSamplingEventList.resize(treeIdx+1);
440  if (fSamplingSelected.size() < UInt_t(treeIdx+1) ) fSamplingSelected.resize(treeIdx+1);
441  for (it = fSamplingEventList.at(treeIdx).begin(); it != fSamplingEventList.at(treeIdx).end(); it++ ) delete (*it);
442  fSamplingEventList.at(treeIdx).clear();
443  fSamplingSelected.at(treeIdx).clear();
444 
445  if (fSampling.size() < UInt_t(treeIdx+1) ) fSampling.resize(treeIdx+1);
446  if (fSamplingNEvents.size() < UInt_t(treeIdx+1) ) fSamplingNEvents.resize(treeIdx+1);
447  if (fSamplingWeight.size() < UInt_t(treeIdx+1) ) fSamplingWeight.resize(treeIdx+1);
448 
449  if (fraction > 0.999999 || fraction < 0.0000001) {
450  fSampling.at( treeIdx ) = false;
451  fSamplingNEvents.at( treeIdx ) = 0;
452  fSamplingWeight.at( treeIdx ) = 1.0;
453  return;
454  }
455 
456  // for the initialization, the sampling has to be turned off, afterwards we will turn it on
457  fSampling.at( treeIdx ) = false;
458 
459  fSamplingNEvents.at( treeIdx ) = Int_t(fraction*GetNEvents());
460  fSamplingWeight.at( treeIdx ) = weight;
461 
462  Long64_t nEvts = GetNEvents();
463  fSamplingEventList.at( treeIdx ).reserve( nEvts );
464  fSamplingSelected.at( treeIdx ).reserve( fSamplingNEvents.at(treeIdx) );
465  for (Long64_t ievt=0; ievt<nEvts; ievt++) {
466  std::pair<Float_t,Long64_t> *p = new std::pair<Float_t,Long64_t>(1.0,ievt);
467  fSamplingEventList.at( treeIdx ).push_back( p );
468  }
469 
470  // now turn sampling on
471  fSampling.at( treeIdx ) = true;
472 }
473 
474 
475 ////////////////////////////////////////////////////////////////////////////////
476 /// create an event sampling (random or importance sampling)
477 
479 {
480  Int_t treeIdx = TreeIndex( GetCurrentType() );
481 
482  if (!fSampling.at(treeIdx) ) return;
483 
484  if (fSamplingRandom == 0 )
485  Log() << kFATAL
486  << "no random generator present for creating a random/importance sampling (initialized?)" << Endl;
487 
488  // delete the previous selection
489  fSamplingSelected.at(treeIdx).clear();
490 
491  // create a temporary event-list
492  std::vector< std::pair< Float_t, Long64_t >* > evtList;
493  std::vector< std::pair< Float_t, Long64_t >* >::iterator evtListIt;
494 
495  // some variables
496  Float_t sumWeights = 0;
497 
498  // make a copy of the event-list
499  evtList.assign( fSamplingEventList.at(treeIdx).begin(), fSamplingEventList.at(treeIdx).end() );
500 
501  // sum up all the weights (internal weights for importance sampling)
502  for (evtListIt = evtList.begin(); evtListIt != evtList.end(); evtListIt++) {
503  sumWeights += (*evtListIt)->first;
504  }
505  evtListIt = evtList.begin();
506 
507  // random numbers
508  std::vector< Float_t > rnds;
509  rnds.reserve(fSamplingNEvents.at(treeIdx));
510 
511  Float_t pos = 0;
512  for (Int_t i = 0; i < fSamplingNEvents.at(treeIdx); i++) {
513  pos = fSamplingRandom->Rndm()*sumWeights;
514  rnds.push_back( pos );
515  }
516 
517  // sort the random numbers
518  std::sort(rnds.begin(),rnds.end());
519 
520  // select the events according to the random numbers
521  std::vector< Float_t >::iterator rndsIt = rnds.begin();
522  Float_t runningSum = 0.000000001;
523  for (evtListIt = evtList.begin(); evtListIt != evtList.end();) {
524  runningSum += (*evtListIt)->first;
525  if (runningSum >= (*rndsIt)) {
526  fSamplingSelected.at(treeIdx).push_back( (*evtListIt) );
527  evtListIt = evtList.erase( evtListIt );
528 
529  rndsIt++;
530  if (rndsIt == rnds.end() ) break;
531  }
532  else {
533  evtListIt++;
534  }
535  }
536 }
537 
538 ////////////////////////////////////////////////////////////////////////////////
539 /// increase the importance sampling weight of the event
540 /// when not successful and decrease it when successful
541 
542 void TMVA::DataSet::EventResult( Bool_t successful, Long64_t evtNumber )
543 {
544 
545  if (!fSampling.at(fCurrentTreeIdx)) return;
546  if (fSamplingWeight.at(fCurrentTreeIdx) > 0.99999999999) return;
547 
548  Long64_t start = 0;
549  Long64_t stop = fSamplingEventList.at(fCurrentTreeIdx).size() -1;
550  if (evtNumber >= 0) {
551  start = evtNumber;
552  stop = evtNumber;
553  }
554  for ( Long64_t iEvt = start; iEvt <= stop; iEvt++ ){
555  if (Long64_t(fSamplingEventList.at(fCurrentTreeIdx).size()) < iEvt) {
556  Log() << kWARNING << "event number (" << iEvt
557  << ") larger than number of sampled events ("
558  << fSamplingEventList.at(fCurrentTreeIdx).size() << " of tree " << fCurrentTreeIdx << ")" << Endl;
559  return;
560  }
561  Float_t weight = fSamplingEventList.at(fCurrentTreeIdx).at( iEvt )->first;
562  if (!successful) {
563  // weight /= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
564  weight /= fSamplingWeight.at(fCurrentTreeIdx);
565  if (weight > 1.0 ) weight = 1.0;
566  }
567  else {
568  // weight *= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
569  weight *= fSamplingWeight.at(fCurrentTreeIdx);
570  }
571  fSamplingEventList.at(fCurrentTreeIdx).at( iEvt )->first = weight;
572  }
573 }
574 
575 
576 ////////////////////////////////////////////////////////////////////////////////
577 /// create the test/trainings tree with all the variables, the weights, the classes, the targets, the spectators, the MVA outputs
578 
580 {
581  Log() << kDEBUG << "GetTree(" << ( type==Types::kTraining ? "training" : "testing" ) << ")" << Endl;
582 
583  // the dataset does not hold the tree, this function returns a new tree everytime it is called
584 
585  if (type!=Types::kTraining && type!=Types::kTesting) return 0;
586 
587  Types::ETreeType savedType = GetCurrentType();
588 
589  SetCurrentType(type);
590  const UInt_t t = TreeIndex(type);
591  if (fResults.size() <= t) {
592  Log() << kWARNING << "No results for treetype " << ( type==Types::kTraining ? "training" : "testing" )
593  << " found. Size=" << fResults.size() << Endl;
594  }
595 
596  // return number of background training events in dataset
597  TString treeName( (type == Types::kTraining ? "TrainTree" : "TestTree" ) );
598  TTree *tree = new TTree(treeName,treeName);
599 
600  Float_t *varVals = new Float_t[fdsi.GetNVariables()];
601  Float_t *tgtVals = new Float_t[fdsi.GetNTargets()];
602  Float_t *visVals = new Float_t[fdsi.GetNSpectators()];
603 
604  UInt_t cls;
605  Float_t weight;
606  // TObjString *className = new TObjString();
607  char *className = new char[40];
608 
609 
610  //Float_t metVals[fResults.at(t).size()][Int_t(fdsi.GetNTargets()+1)];
611  // replace by: [Joerg]
612  Float_t **metVals = new Float_t*[fResults.at(t).size()];
613  for(UInt_t i=0; i<fResults.at(t).size(); i++ )
614  metVals[i] = new Float_t[fdsi.GetNTargets()+fdsi.GetNClasses()];
615 
616  // create branches for event-variables
617  tree->Branch( "classID", &cls, "classID/I" );
618  tree->Branch( "className",(void*)className, "className/C" );
619 
620  // create all branches for the variables
621  Int_t n = 0;
622  for (std::vector<VariableInfo>::const_iterator itVars = fdsi.GetVariableInfos().begin();
623  itVars != fdsi.GetVariableInfos().end(); itVars++) {
624 
625  // has to be changed to take care of types different than float: TODO
626  tree->Branch( (*itVars).GetInternalName(), &varVals[n], (*itVars).GetInternalName()+TString("/F") );
627  n++;
628  }
629  // create the branches for the targets
630  n = 0;
631  for (std::vector<VariableInfo>::const_iterator itTgts = fdsi.GetTargetInfos().begin();
632  itTgts != fdsi.GetTargetInfos().end(); itTgts++) {
633  // has to be changed to take care of types different than float: TODO
634  tree->Branch( (*itTgts).GetInternalName(), &tgtVals[n], (*itTgts).GetInternalName()+TString("/F") );
635  n++;
636  }
637  // create the branches for the spectator variables
638  n = 0;
639  for (std::vector<VariableInfo>::const_iterator itVis = fdsi.GetSpectatorInfos().begin();
640  itVis != fdsi.GetSpectatorInfos().end(); itVis++) {
641  // has to be changed to take care of types different than float: TODO
642  tree->Branch( (*itVis).GetInternalName(), &visVals[n], (*itVis).GetInternalName()+TString("/F") );
643  n++;
644  }
645 
646  tree->Branch( "weight", &weight, "weight/F" );
647 
648  // create all the branches for the results
649  n = 0;
650  for (std::map< TString, Results* >::iterator itMethod = fResults.at(t).begin();
651  itMethod != fResults.at(t).end(); itMethod++) {
652 
653 
654  Log() << kDEBUG << "analysis type: " << (itMethod->second->GetAnalysisType()==Types::kRegression ? "Regression" :
655  (itMethod->second->GetAnalysisType()==Types::kMulticlass ? "Multiclass" : "Classification" )) << Endl;
656 
657  if (itMethod->second->GetAnalysisType() == Types::kClassification) {
658  // classification
659  tree->Branch( itMethod->first, &(metVals[n][0]), itMethod->first + "/F" );
660  }
661  else if (itMethod->second->GetAnalysisType() == Types::kMulticlass) {
662  // multiclass classification
663  TString leafList("");
664  for (UInt_t iCls = 0; iCls < fdsi.GetNClasses(); iCls++) {
665  if (iCls > 0) leafList.Append( ":" );
666  leafList.Append( fdsi.GetClassInfo( iCls )->GetName() );
667  leafList.Append( "/F" );
668  }
669  Log() << kDEBUG << "itMethod->first " << itMethod->first << " LEAFLIST: "
670  << leafList << " itMethod->second " << itMethod->second << Endl;
671  tree->Branch( itMethod->first, (metVals[n]), leafList );
672  }
673  else if (itMethod->second->GetAnalysisType() == Types::kRegression) {
674  // regression
675  TString leafList("");
676  for (UInt_t iTgt = 0; iTgt < fdsi.GetNTargets(); iTgt++) {
677  if (iTgt > 0) leafList.Append( ":" );
678  leafList.Append( fdsi.GetTargetInfo( iTgt ).GetInternalName() );
679  // leafList.Append( fdsi.GetTargetInfo( iTgt ).GetLabel() );
680  leafList.Append( "/F" );
681  }
682  Log() << kDEBUG << "itMethod->first " << itMethod->first << " LEAFLIST: "
683  << leafList << " itMethod->second " << itMethod->second << Endl;
684  tree->Branch( itMethod->first, (metVals[n]), leafList );
685  }
686  else {
687  Log() << kWARNING << "Unknown analysis type for result found when writing TestTree." << Endl;
688  }
689  n++;
690 
691  }
692 
693  // loop through all the events
694  for (Long64_t iEvt = 0; iEvt < GetNEvents( type ); iEvt++) {
695  // write the event-variables
696  const Event* ev = GetEvent( iEvt );
697  // write the classnumber and the classname
698  cls = ev->GetClass();
699  weight = ev->GetWeight();
700  TString tmp = fdsi.GetClassInfo( cls )->GetName();
701  for (Int_t itmp = 0; itmp < tmp.Sizeof(); itmp++) {
702  className[itmp] = tmp(itmp);
703  className[itmp+1] = 0;
704  }
705 
706  // write the variables, targets and spectator variables
707  for (UInt_t ivar = 0; ivar < ev->GetNVariables(); ivar++) varVals[ivar] = ev->GetValue( ivar );
708  for (UInt_t itgt = 0; itgt < ev->GetNTargets(); itgt++) tgtVals[itgt] = ev->GetTarget( itgt );
709  for (UInt_t ivis = 0; ivis < ev->GetNSpectators(); ivis++) visVals[ivis] = ev->GetSpectator( ivis );
710 
711 
712  // loop through all the results and write the branches
713  n=0;
714  for (std::map<TString, Results*>::iterator itMethod = fResults.at(t).begin();
715  itMethod != fResults.at(t).end(); itMethod++) {
716  Results* results = itMethod->second;
717 
718  const std::vector< Float_t >& vals = results->operator[](iEvt);
719 
720  if (itMethod->second->GetAnalysisType() == Types::kClassification) {
721  // classification
722  metVals[n][0] = vals[0];
723  }
724  else if (itMethod->second->GetAnalysisType() == Types::kMulticlass) {
725  // multiclass classification
726  for (UInt_t nCls = 0, nClsEnd=fdsi.GetNClasses(); nCls < nClsEnd; nCls++) {
727  Float_t val = vals.at(nCls);
728  metVals[n][nCls] = val;
729  }
730  }
731  else if (itMethod->second->GetAnalysisType() == Types::kRegression) {
732  // regression
733  for (UInt_t nTgts = 0; nTgts < fdsi.GetNTargets(); nTgts++) {
734  Float_t val = vals.at(nTgts);
735  metVals[n][nTgts] = val;
736  }
737  }
738  n++;
739  }
740  // fill the variables into the tree
741  tree->Fill();
742  }
743 
744  Log() << kINFO << "Created tree '" << tree->GetName() << "' with " << tree->GetEntries() << " events" << Endl;
745 
746  SetCurrentType(savedType);
747 
748  delete[] varVals;
749  delete[] tgtVals;
750  delete[] visVals;
751 
752  for(UInt_t i=0; i<fResults.at(t).size(); i++ )
753  delete[] metVals[i];
754  delete[] metVals;
755 
756  delete[] className;
757 
758  return tree;
759 }
760 
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:216
void SetEventCollection(std::vector< Event * > *, Types::ETreeType)
Sets the event collection (by DataSetFactory)
Definition: DataSet.cxx:235
Random number generator class based on M.
Definition: TRandom3.h:29
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
long long Long64_t
Definition: RtypesCore.h:69
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Definition: DataSet.cxx:225
float Float_t
Definition: RtypesCore.h:53
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4306
TRandom3 * fSamplingRandom
Definition: DataSet.h:176
EAnalysisType
Definition: Types.h:124
void SetTreeType(Types::ETreeType type)
Definition: Results.h:72
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< Char_t > fBlockBelongToTraining
Definition: DataSet.h:187
const Bool_t kFALSE
Definition: Rtypes.h:92
void ClearNClassEvents(Int_t type)
Definition: DataSet.cxx:144
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:376
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Definition: Event.cxx:256
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:420
const char * Data() const
Definition: TString.h:349
virtual ~DataSet()
destructor
Definition: DataSet.cxx:100
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
Definition: DataSet.cxx:579
TString & Append(const char *cs)
Definition: TString.h:492
std::vector< std::vector< double > > Data
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:303
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
Definition: DataSet.cxx:384
void ApplyTrainingSetDivision()
apply division of data set
Definition: DataSet.cxx:364
UInt_t GetNSpectators() const
accessor to the number of spectators
Definition: Event.cxx:322
TThread * t[5]
Definition: threadsh1.C:13
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
Definition: DataSet.cxx:263
std::vector< std::vector< Long64_t > > fClassEvents
Definition: DataSet.h:180
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:396
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particulary Method instance (here appareantly called resultsName i...
Definition: DataSet.cxx:314
unsigned int UInt_t
Definition: RtypesCore.h:42
void DivideTrainingSet(UInt_t blockNum)
divide training set
Definition: DataSet.cxx:340
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
Definition: DataSet.cxx:173
const TString & GetName() const
Definition: ClassInfo.h:72
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:404
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition: DataSet.cxx:478
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:135
const Event * GetEvent() const
Definition: DataSet.cxx:186
std::vector< Char_t > fSampling
Definition: DataSet.h:171
tuple tree
Definition: tree.py:24
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition: DataSet.cxx:542
std::vector< Float_t > fSamplingWeight
Definition: DataSet.h:173
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:412
int type
Definition: TGX11.cxx:120
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:231
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:314
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:208
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition: DataSet.cxx:200
std::vector< Int_t > fSamplingNEvents
Definition: DataSet.h:172
UInt_t GetClass() const
Definition: Event.h:86
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:152
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:101
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1623
#define dest(otri, vertexptr)
Definition: triangle.c:1040
#define NULL
Definition: Rtypes.h:82
virtual Int_t Sizeof() const
Returns size string will occupy on I/O buffer.
Definition: TString.cxx:1284
TString()
TString default ctor.
Definition: TString.cxx:87
virtual Long64_t GetEntries() const
Definition: TTree.h:386
A TTree object has a header with a name and a title.
Definition: TTree.h:98
const Bool_t kTRUE
Definition: Rtypes.h:91
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition: DataSet.cxx:428
const Int_t n
Definition: legend1.C:16
Definition: math.cpp:60