Logo ROOT  
Reference Guide
DataSet.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : DataSet *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
17 * *
18 * Copyright (c) 2006: *
19 * CERN, Switzerland *
20 * MPI-K Heidelberg, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27/*! \class TMVA::DataSet
28\ingroup TMVA
29
30Class that contains all the data information
31
32*/
33
34#include <vector>
35#include <algorithm>
36#include <cstdlib>
37#include <stdexcept>
38
39#include "TMVA/DataSetInfo.h"
40#include "TMVA/DataSet.h"
41#include "TMVA/Event.h"
42#include "TMVA/MsgLogger.h"
46#include "TMVA/Configurable.h"
47
48#include "TMVA/Types.h"
49#include "TMVA/Results.h"
50#include "TMVA/VariableInfo.h"
51
52#include "TRandom3.h"
53#include "strlcpy.h"
54
55////////////////////////////////////////////////////////////////////////////////
56/// constructor
57
59 :TNamed(dsi.GetName(),"DataSet"),
60 fdsi(&dsi),
61 fEventCollection(4),
62 fCurrentTreeIdx(0),
63 fCurrentEventIdx(0),
64 fHasNegativeEventWeights(kFALSE),
65 fLogger( new MsgLogger(TString(TString("Dataset:")+dsi.GetName()).Data()) ),
66 fTrainingBlockSize(0)
67{
68
69 fClassEvents.resize(4);
70 fBlockBelongToTraining.reserve(10);
72
73 // sampling
75
76 Int_t treeNum = 2;
77 fSampling.resize( treeNum );
78 fSamplingNEvents.resize( treeNum );
79 fSamplingWeight.resize(treeNum);
80
81 for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
82 fSampling.at(treeIdx) = kFALSE;
83 fSamplingNEvents.at(treeIdx) = 0;
84 fSamplingWeight.at(treeIdx) = 1.0;
85 }
86}
87
88////////////////////////////////////////////////////////////////////////////////
89/// constructor
90
92 :fdsi(new DataSetInfo(GetName())),
93 fEventCollection(4),
94 fCurrentTreeIdx(0),
95 fCurrentEventIdx(0),
96 fHasNegativeEventWeights(kFALSE),
97 fLogger( new MsgLogger(TString(TString("Dataset:")+GetName()).Data()) ),
98 fTrainingBlockSize(0)
99{
100
101 fClassEvents.resize(4);
102 fBlockBelongToTraining.reserve(10);
103 fBlockBelongToTraining.push_back(kTRUE);
104
105 // sampling
106 fSamplingRandom = 0;
107
108 Int_t treeNum = 2;
109 fSampling.resize( treeNum );
110 fSamplingNEvents.resize( treeNum );
111 fSamplingWeight.resize(treeNum);
112
113 for (Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
114 fSampling.at(treeIdx) = kFALSE;
115 fSamplingNEvents.at(treeIdx) = 0;
116 fSamplingWeight.at(treeIdx) = 1.0;
117 }
118}
119
120////////////////////////////////////////////////////////////////////////////////
121/// destructor
122
124{
125 // delete event collection
126 Bool_t deleteEvents=true; // dataset owns the events /JS
127 DestroyCollection( Types::kTraining, deleteEvents );
128 DestroyCollection( Types::kTesting, deleteEvents );
129
130 fBlockBelongToTraining.clear();
131 // delete results
132 for (std::vector< std::map< TString, Results* > >::iterator it = fResults.begin(); it != fResults.end(); ++it) {
133 for (std::map< TString, Results* >::iterator itMap = (*it).begin(); itMap != (*it).end(); ++itMap) {
134 delete itMap->second;
135 }
136 }
137
138 // delete sampling
139 if (fSamplingRandom != 0 ) delete fSamplingRandom;
140
141
142 // need also to delete fEventCollections[2] and [3], not sure if they are used
143 DestroyCollection( Types::kValidation, deleteEvents );
144 DestroyCollection( Types::kTrainingOriginal, deleteEvents );
145
146 delete fLogger;
147}
148
149////////////////////////////////////////////////////////////////////////////////
150
152{
153 if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
154 if (fClassEvents.at( type ).size() < classNumber+1) fClassEvents.at( type ).resize( classNumber+1 );
155 fClassEvents.at( type ).at( classNumber ) += 1;
156}
157
158////////////////////////////////////////////////////////////////////////////////
159
161{
162 if (fClassEvents.size()<(UInt_t)(type+1)) fClassEvents.resize( type+1 );
163 fClassEvents.at( type ).clear();
164}
165
166////////////////////////////////////////////////////////////////////////////////
167
169{
170 try {
171 return fClassEvents.at(type).at(classNumber);
172 }
173 catch (std::out_of_range &) {
174 ClassInfo* ci = fdsi->GetClassInfo( classNumber );
175 Log() << kFATAL << Form("Dataset[%s] : ",fdsi->GetName()) << "No " << (type==0?"training":(type==1?"testing":"_unknown_type_"))
176 << " events for class " << (ci==NULL?"_no_name_known_":ci->GetName()) << " (index # "<<classNumber<<")"
177 << " available. Check if all class names are spelled correctly and if events are"
178 << " passing the selection cuts." << Endl;
179 }
180 catch (...) {
181 Log() << kFATAL << Form("Dataset[%s] : ",fdsi->GetName()) << "ERROR/CAUGHT : DataSet/GetNClassEvents, .. unknown error" << Endl;
182 }
183 return 0;
184}
185
186////////////////////////////////////////////////////////////////////////////////
187/// destroys the event collection (events + vector)
188
190{
191 UInt_t i = TreeIndex(type);
192 if (i>=fEventCollection.size() || fEventCollection[i].size()==0) return;
193 if (deleteEvents) {
194
195 for (UInt_t j=0; j<fEventCollection[i].size(); j++) delete fEventCollection[i][j];
196 }
197 fEventCollection[i].clear();
198}
199
200////////////////////////////////////////////////////////////////////////////////
201
203{
204 if (fSampling.size() > UInt_t(fCurrentTreeIdx) && fSampling.at(fCurrentTreeIdx)) {
205 Long64_t iEvt = fSamplingSelected.at(fCurrentTreeIdx).at( fCurrentEventIdx ).second;
206 return ((fEventCollection.at(fCurrentTreeIdx))).at(iEvt);
207 }
208 else {
209 return ((fEventCollection.at(fCurrentTreeIdx))).at(fCurrentEventIdx);
210 }
211}
212
213////////////////////////////////////////////////////////////////////////////////
214/// access the number of variables through the datasetinfo
215
217{
218 return fdsi->GetNVariables();
219}
220
221////////////////////////////////////////////////////////////////////////////////
222/// access the number of targets through the datasetinfo
223
225{
226 return fdsi->GetNTargets();
227}
228
229////////////////////////////////////////////////////////////////////////////////
230/// access the number of targets through the datasetinfo
231
233{
234 return fdsi->GetNSpectators();
235}
236
237////////////////////////////////////////////////////////////////////////////////
238/// add event to event list
239/// after which the event is owned by the dataset
240
242{
243 fEventCollection.at(Int_t(type)).push_back(ev);
244 if (ev->GetWeight()<0) fHasNegativeEventWeights = kTRUE;
245}
246
247////////////////////////////////////////////////////////////////////////////////
248/// Sets the event collection (by DataSetFactory)
249
250void TMVA::DataSet::SetEventCollection(std::vector<TMVA::Event*>* events, Types::ETreeType type, Bool_t deleteEvents)
251{
252 DestroyCollection(type,deleteEvents);
253
254 const Int_t t = TreeIndex(type);
255 ClearNClassEvents( type );
256 //pointer to std::vector is not serializable,
257 fEventCollection.at(t) = *events;
258 for (std::vector<Event*>::iterator it = fEventCollection.at(t).begin(); it < fEventCollection.at(t).end(); ++it) {
259 IncrementNClassEvents( t, (*it)->GetClass() );
260 }
261}
262
263////////////////////////////////////////////////////////////////////////////////
264
267 Types::EAnalysisType analysistype )
268{
269 UInt_t t = TreeIndex(type);
270 if (t<fResults.size()) {
271 const std::map< TString, Results* >& resultsForType = fResults[t];
272 std::map< TString, Results* >::const_iterator it = resultsForType.find(resultsName);
273 if (it!=resultsForType.end()) {
274 //Log() << kINFO << " GetResults("<<info<<") returns existing result." << Endl;
275 return it->second;
276 }
277 }
278 else {
279 fResults.resize(t+1);
280 }
281
282 // nothing found
283
284 Results * newresults = 0;
285 switch(analysistype) {
287 newresults = new ResultsClassification(fdsi,resultsName);
288 break;
290 newresults = new ResultsRegression(fdsi,resultsName);
291 break;
293 newresults = new ResultsMulticlass(fdsi,resultsName);
294 break;
296 newresults = new ResultsClassification(fdsi,resultsName);
297 break;
299 //Log() << kINFO << " GetResults("<<info<<") can't create new one." << Endl;
300 return 0;
301 break;
302 }
303
304 newresults->SetTreeType( type );
305 fResults[t][resultsName] = newresults;
306
307 //Log() << kINFO << " GetResults("<<info<<") builds new result." << Endl;
308 return newresults;
309}
310////////////////////////////////////////////////////////////////////////////////
311/// delete the results stored for this particular Method instance.
312/// (here apparently called resultsName instead of MethodTitle
313/// Tree type (Training, testing etc..)
314/// Analysis Type (Classification, Multiclass, Regression etc..)
315
316void TMVA::DataSet::DeleteResults( const TString & resultsName,
318 Types::EAnalysisType /* analysistype */ )
319{
320 if (fResults.empty()) return;
321
322 if (UInt_t(type) > fResults.size()){
323 Log()<<kFATAL<< Form("Dataset[%s] : ",fdsi->GetName()) << "you asked for an Treetype (training/testing/...)"
324 << " whose index " << type << " does not exist " << Endl;
325 }
326 std::map< TString, Results* >& resultsForType = fResults[UInt_t(type)];
327 std::map< TString, Results* >::iterator it = resultsForType.find(resultsName);
328 if (it!=resultsForType.end()) {
329 Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << " Delete Results previous existing result:" << resultsName
330 << " of type " << type << Endl;
331 delete it->second;
332 resultsForType.erase(it->first);
333 }
334 else {
335 Log() << kINFO << Form("Dataset[%s] : ",fdsi->GetName()) << "could not fine Result class of " << resultsName
336 << " of type " << type << " which I should have deleted" << Endl;
337 }
338}
339
340////////////////////////////////////////////////////////////////////////////////
341/// Deletes all results currently in the dataset.
342///
344 Types::EAnalysisType /* analysistype */ )
345{
346 if (fResults.empty()) return;
347
348 if (UInt_t(type) > fResults.size()){
349 Log()<<kFATAL<< Form("Dataset[%s] : ",fdsi->GetName()) << "you asked for an Treetype (training/testing/...)"
350 << " whose index " << type << " does not exist " << Endl;
351 }
352
353 std::map<TString, Results *> & resultsForType = fResults[UInt_t(type)];
354
355 for (auto && it : resultsForType) {
356 auto & resultsName = it.first;
357
358 Log() << kDEBUG << Form("Dataset[%s] : ", fdsi->GetName())
359 << " DeleteAllResults previous existing result: "
360 << resultsName << " of type " << type << Endl;
361
362 delete it.second;
363 }
364
365 resultsForType.clear();
366}
367
368////////////////////////////////////////////////////////////////////////////////
369/// divide training set
370
372{
373 Int_t tOrg = TreeIndex(Types::kTrainingOriginal),tTrn = TreeIndex(Types::kTraining);
374 // not changing anything ??
375 if (fBlockBelongToTraining.size() == blockNum) return;
376 // storing the original training vector
377 if (fBlockBelongToTraining.size() == 1) {
378 if (fEventCollection[tOrg].size() == 0)
379 fEventCollection[tOrg].resize(fEventCollection[tTrn].size());
380 fEventCollection[tOrg].clear();
381 for (UInt_t i=0; i<fEventCollection[tTrn].size(); i++)
382 fEventCollection[tOrg].push_back(fEventCollection[tTrn][i]);
383 fClassEvents[tOrg] = fClassEvents[tTrn];
384 }
385 //reseting the event division vector
386 fBlockBelongToTraining.clear();
387 for (UInt_t i=0 ; i < blockNum ; i++) fBlockBelongToTraining.push_back(kTRUE);
388
389 ApplyTrainingSetDivision();
390}
391
392////////////////////////////////////////////////////////////////////////////////
393/// apply division of data set
394
396{
397 Int_t tOrg = TreeIndex(Types::kTrainingOriginal), tTrn = TreeIndex(Types::kTraining), tVld = TreeIndex(Types::kValidation);
398 fEventCollection[tTrn].clear();
399 if (fEventCollection[tVld].size()==0)
400 fEventCollection[tVld].resize(fEventCollection[tOrg].size());
401 fEventCollection[tVld].clear();
402
403 //creating the new events collections, notice that the events that can't be evenly divided belong to the last event
404 for (UInt_t i=0; i<fEventCollection[tOrg].size(); i++) {
405 if (fBlockBelongToTraining[i % fBlockBelongToTraining.size()])
406 fEventCollection[tTrn].push_back(fEventCollection[tOrg][i]);
407 else
408 fEventCollection[tVld].push_back(fEventCollection[tOrg][i]);
409 }
410}
411
412////////////////////////////////////////////////////////////////////////////////
413/// move training block
414
416{
418 fBlockBelongToTraining[blockInd]=kFALSE;
419 else
420 fBlockBelongToTraining[blockInd]=kTRUE;
421 if (applyChanges) ApplyTrainingSetDivision();
422}
423
424////////////////////////////////////////////////////////////////////////////////
425/// return number of signal test events in dataset
426
428{
429 return GetNClassEvents(Types::kTesting, fdsi->GetClassInfo("Signal")->GetNumber() );
430}
431
432////////////////////////////////////////////////////////////////////////////////
433/// return number of background test events in dataset
434
436{
437 return GetNClassEvents(Types::kTesting, fdsi->GetClassInfo("Background")->GetNumber() );
438}
439
440////////////////////////////////////////////////////////////////////////////////
441/// return number of signal training events in dataset
442
444{
445 return GetNClassEvents(Types::kTraining, fdsi->GetClassInfo("Signal")->GetNumber() );
446}
447
448////////////////////////////////////////////////////////////////////////////////
449/// return number of background training events in dataset
450
452{
453 return GetNClassEvents(Types::kTraining, fdsi->GetClassInfo("Background")->GetNumber() );
454}
455
456////////////////////////////////////////////////////////////////////////////////
457/// initialize random or importance sampling
458
459void TMVA::DataSet::InitSampling( Float_t fraction, Float_t weight, UInt_t seed )
460{
461 // add a random generator if not yet present
462 if (fSamplingRandom == 0 ) fSamplingRandom = new TRandom3( seed );
463
464 // first, clear the lists
465 std::vector< std::pair< Float_t, Long64_t >* > evtList;
466
467 Int_t treeIdx = TreeIndex( GetCurrentType() );
468
469 if (fSamplingEventList.size() < UInt_t(treeIdx+1) ) fSamplingEventList.resize(treeIdx+1);
470 if (fSamplingSelected.size() < UInt_t(treeIdx+1) ) fSamplingSelected.resize(treeIdx+1);
471
472 fSamplingEventList.at(treeIdx).clear();
473 fSamplingSelected.at(treeIdx).clear();
474
475 if (fSampling.size() < UInt_t(treeIdx+1) ) fSampling.resize(treeIdx+1);
476 if (fSamplingNEvents.size() < UInt_t(treeIdx+1) ) fSamplingNEvents.resize(treeIdx+1);
477 if (fSamplingWeight.size() < UInt_t(treeIdx+1) ) fSamplingWeight.resize(treeIdx+1);
478
479 if (fraction > 0.999999 || fraction < 0.0000001) {
480 fSampling.at( treeIdx ) = false;
481 fSamplingNEvents.at( treeIdx ) = 0;
482 fSamplingWeight.at( treeIdx ) = 1.0;
483 return;
484 }
485
486 // for the initialization, the sampling has to be turned off, afterwards we will turn it on
487 fSampling.at( treeIdx ) = false;
488
489 fSamplingNEvents.at( treeIdx ) = Int_t(fraction*GetNEvents());
490 fSamplingWeight.at( treeIdx ) = weight;
491
492 Long64_t nEvts = GetNEvents();
493 fSamplingEventList.at( treeIdx ).reserve( nEvts );
494 fSamplingSelected.at( treeIdx ).reserve( fSamplingNEvents.at(treeIdx) );
495 for (Long64_t ievt=0; ievt<nEvts; ievt++) {
496 std::pair<Float_t,Long64_t> p(1.0,ievt);
497 fSamplingEventList.at( treeIdx ).push_back( p );
498 }
499
500 // now turn sampling on
501 fSampling.at( treeIdx ) = true;
502}
503
504
505////////////////////////////////////////////////////////////////////////////////
506/// create an event sampling (random or importance sampling)
507
509{
510 Int_t treeIdx = TreeIndex( GetCurrentType() );
511
512 if (!fSampling.at(treeIdx) ) return;
513
514 if (fSamplingRandom == 0 )
515 Log() << kFATAL<< Form("Dataset[%s] : ",fdsi->GetName())
516 << "no random generator present for creating a random/importance sampling (initialized?)" << Endl;
517
518 // delete the previous selection
519 fSamplingSelected.at(treeIdx).clear();
520
521 // create a temporary event-list
522 std::vector< std::pair< Float_t, Long64_t > > evtList;
523 std::vector< std::pair< Float_t, Long64_t > >::iterator evtListIt;
524
525 // some variables
526 Float_t sumWeights = 0;
527
528 // make a copy of the event-list
529 evtList.assign( fSamplingEventList.at(treeIdx).begin(), fSamplingEventList.at(treeIdx).end() );
530
531 // sum up all the weights (internal weights for importance sampling)
532 for (evtListIt = evtList.begin(); evtListIt != evtList.end(); ++evtListIt) {
533 sumWeights += (*evtListIt).first;
534 }
535 evtListIt = evtList.begin();
536
537 // random numbers
538 std::vector< Float_t > rnds;
539 rnds.reserve(fSamplingNEvents.at(treeIdx));
540
541 Float_t pos = 0;
542 for (Int_t i = 0; i < fSamplingNEvents.at(treeIdx); i++) {
543 pos = fSamplingRandom->Rndm()*sumWeights;
544 rnds.push_back( pos );
545 }
546
547 // sort the random numbers
548 std::sort(rnds.begin(),rnds.end());
549
550 // select the events according to the random numbers
551 std::vector< Float_t >::iterator rndsIt = rnds.begin();
552 Float_t runningSum = 0.000000001;
553 for (evtListIt = evtList.begin(); evtListIt != evtList.end();) {
554 runningSum += (*evtListIt).first;
555 if (runningSum >= (*rndsIt)) {
556 fSamplingSelected.at(treeIdx).push_back( (*evtListIt) );
557 evtListIt = evtList.erase( evtListIt );
558
559 ++rndsIt;
560 if (rndsIt == rnds.end() ) break;
561 }
562 else {
563 ++evtListIt;
564 }
565 }
566}
567
568////////////////////////////////////////////////////////////////////////////////
569/// increase the importance sampling weight of the event
570/// when not successful and decrease it when successful
571
572void TMVA::DataSet::EventResult( Bool_t successful, Long64_t evtNumber )
573{
574
575 if (!fSampling.at(fCurrentTreeIdx)) return;
576 if (fSamplingWeight.at(fCurrentTreeIdx) > 0.99999999999) return;
577
578 Long64_t start = 0;
579 Long64_t stop = fSamplingEventList.at(fCurrentTreeIdx).size() -1;
580 if (evtNumber >= 0) {
581 start = evtNumber;
582 stop = evtNumber;
583 }
584 for ( Long64_t iEvt = start; iEvt <= stop; iEvt++ ){
585 if (Long64_t(fSamplingEventList.at(fCurrentTreeIdx).size()) < iEvt) {
586 Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "event number (" << iEvt
587 << ") larger than number of sampled events ("
588 << fSamplingEventList.at(fCurrentTreeIdx).size() << " of tree " << fCurrentTreeIdx << ")" << Endl;
589 return;
590 }
591 Float_t weight = fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first;
592 if (!successful) {
593 // weight /= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
594 weight /= fSamplingWeight.at(fCurrentTreeIdx);
595 if (weight > 1.0 ) weight = 1.0;
596 }
597 else {
598 // weight *= (fSamplingWeight.at(fCurrentTreeIdx)/fSamplingEventList.at(fCurrentTreeIdx).size());
599 weight *= fSamplingWeight.at(fCurrentTreeIdx);
600 }
601 fSamplingEventList.at(fCurrentTreeIdx).at( iEvt ).first = weight;
602 }
603}
604
605////////////////////////////////////////////////////////////////////////////////
606/// create the test/trainings tree with all the variables, the weights, the
607/// classes, the targets, the spectators, the MVA outputs
608
610{
611 Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "GetTree(" << ( type==Types::kTraining ? "training" : "testing" ) << ")" << Endl;
612
613 // the dataset does not hold the tree, this function returns a new tree every time it is called
614
615 if (type!=Types::kTraining && type!=Types::kTesting) return 0;
616
617 Types::ETreeType savedType = GetCurrentType();
618
619 SetCurrentType(type);
620 const UInt_t t = TreeIndex(type);
621 if (fResults.size() <= t) {
622 Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "No results for treetype " << ( type==Types::kTraining ? "training" : "testing" )
623 << " found. Size=" << fResults.size() << Endl;
624 }
625
626 // return number of background training events in dataset
627 TString treeName( (type == Types::kTraining ? "TrainTree" : "TestTree" ) );
628 TTree *tree = new TTree(treeName,treeName);
629
630 Float_t *varVals = new Float_t[fdsi->GetNVariables()];
631 Float_t *tgtVals = new Float_t[fdsi->GetNTargets()];
632 Float_t *visVals = new Float_t[fdsi->GetNSpectators()];
633
634 UInt_t cls;
635 Float_t weight;
636 // TObjString *className = new TObjString();
637 char className[40];
638
639
640 //Float_t metVals[fResults.at(t).size()][Int_t(fdsi->GetNTargets()+1)];
641 // replace by: [Joerg]
642 Float_t **metVals = new Float_t*[fResults.at(t).size()];
643 for(UInt_t i=0; i<fResults.at(t).size(); i++ )
644 metVals[i] = new Float_t[fdsi->GetNTargets()+fdsi->GetNClasses()];
645
646 // create branches for event-variables
647 tree->Branch( "classID", &cls, "classID/I" );
648 tree->Branch( "className", className, "className/C" );
649
650 // create all branches for the variables
651 Int_t n = 0;
652 Int_t ivar_array = 0;
653 Int_t arraySize = -1;
654 for (std::vector<VariableInfo>::const_iterator itVars = fdsi->GetVariableInfos().begin();
655 itVars != fdsi->GetVariableInfos().end(); ++itVars) {
656
657 // has to be changed to take care of types different than float: TODO
658 if (!itVars->TestBit(DataSetInfo::kIsArrayVariable) )
659 tree->Branch( (*itVars).GetInternalName(), &varVals[n], (*itVars).GetInternalName()+TString("/F") );
660 else {
661 // variable is an array
662 if (ivar_array == 0) {
663 TString name = (*itVars).GetInternalName();
664 name.ReplaceAll("[0]", "");
665 arraySize = fdsi->GetVarArraySize((*itVars).GetExpression());
666 tree->Branch(name, &varVals[n], name + TString::Format("[%d]/F", arraySize));
667 Log() << kDEBUG << "creating branch for array " << name << " with size " << arraySize << Endl;
668 }
669 ivar_array++;
670 if (ivar_array == arraySize)
671 ivar_array = 0;
672 }
673 n++;
674 }
675 // create the branches for the targets
676 n = 0;
677 for (std::vector<VariableInfo>::const_iterator itTgts = fdsi->GetTargetInfos().begin();
678 itTgts != fdsi->GetTargetInfos().end(); ++itTgts) {
679 // has to be changed to take care of types different than float: TODO
680 tree->Branch( (*itTgts).GetInternalName(), &tgtVals[n], (*itTgts).GetInternalName()+TString("/F") );
681 n++;
682 }
683 // create the branches for the spectator variables
684 n = 0;
685 for (std::vector<VariableInfo>::const_iterator itVis = fdsi->GetSpectatorInfos().begin();
686 itVis != fdsi->GetSpectatorInfos().end(); ++itVis) {
687 // has to be changed to take care of types different than float: TODO
688 tree->Branch( (*itVis).GetInternalName(), &visVals[n], (*itVis).GetInternalName()+TString("/F") );
689 n++;
690 }
691
692 tree->Branch( "weight", &weight, "weight/F" );
693
694 // create all the branches for the results
695 n = 0;
696 for (std::map< TString, Results* >::iterator itMethod = fResults.at(t).begin();
697 itMethod != fResults.at(t).end(); ++itMethod) {
698
699
700 Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "analysis type: " << (itMethod->second->GetAnalysisType()==Types::kRegression ? "Regression" :
701 (itMethod->second->GetAnalysisType()==Types::kMulticlass ? "Multiclass" : "Classification" )) << Endl;
702
703 if (itMethod->second->GetAnalysisType() == Types::kClassification) {
704 // classification
705 tree->Branch( itMethod->first, &(metVals[n][0]), itMethod->first + "/F" );
706 }
707 else if (itMethod->second->GetAnalysisType() == Types::kMulticlass) {
708 // multiclass classification
709 TString leafList("");
710 for (UInt_t iCls = 0; iCls < fdsi->GetNClasses(); iCls++) {
711 if (iCls > 0) leafList.Append( ":" );
712 leafList.Append( fdsi->GetClassInfo( iCls )->GetName() );
713 leafList.Append( "/F" );
714 }
715 Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "itMethod->first " << itMethod->first << " LEAFLIST: "
716 << leafList << " itMethod->second " << itMethod->second << Endl;
717 tree->Branch( itMethod->first, (metVals[n]), leafList );
718 }
719 else if (itMethod->second->GetAnalysisType() == Types::kRegression) {
720 // regression
721 TString leafList("");
722 for (UInt_t iTgt = 0; iTgt < fdsi->GetNTargets(); iTgt++) {
723 if (iTgt > 0) leafList.Append( ":" );
724 leafList.Append( fdsi->GetTargetInfo( iTgt ).GetInternalName() );
725 // leafList.Append( fdsi->GetTargetInfo( iTgt ).GetLabel() );
726 leafList.Append( "/F" );
727 }
728 Log() << kDEBUG << Form("Dataset[%s] : ",fdsi->GetName()) << "itMethod->first " << itMethod->first << " LEAFLIST: "
729 << leafList << " itMethod->second " << itMethod->second << Endl;
730 tree->Branch( itMethod->first, (metVals[n]), leafList );
731 }
732 else {
733 Log() << kWARNING << Form("Dataset[%s] : ",fdsi->GetName()) << "Unknown analysis type for result found when writing TestTree." << Endl;
734 }
735 n++;
736
737 }
738
739 // Sanity check, ensure all result sets have the expected number of events
740 for (auto && itMethod : fResults.at(t)) {
741 auto numEvents = GetNEvents(type);
742 auto results = itMethod.second;
743 auto resultsName = itMethod.first;
744
745 Long64_t numEventsResults = 0;
746 auto analysisType = results->GetAnalysisType();
747 if (analysisType == Types::kClassification) {
748 numEventsResults = dynamic_cast<ResultsClassification *>(results)->GetSize();
749 } else if (analysisType == Types::kMulticlass) {
750 numEventsResults = dynamic_cast<ResultsMulticlass *>(results)->GetSize();
751 } else if (analysisType == Types::kRegression) {
752 numEventsResults = dynamic_cast<ResultsRegression *>(results)->GetSize();
753 } else {
754 Log() << kFATAL << "Unexpected analysisType." << Endl;
755 }
756
757 if (numEventsResults != numEvents) {
758 Log() << kFATAL << "An error occurred in DataSet::GetTree. "
759 "Inconsistent size of result for result with name '"
760 << resultsName << "'."
761 << " Size is '" << std::to_string(numEventsResults)
762 << "'.'"
763 << " Expected '" << numEvents << "'." << Endl;
764 }
765 }
766
767 // loop through all the events
768 for (Long64_t iEvt = 0; iEvt < GetNEvents( type ); iEvt++) {
769 // write the event-variables
770 const Event* ev = GetEvent( iEvt );
771 // write the classnumber and the classname
772 cls = ev->GetClass();
773 weight = ev->GetWeight();
774 strlcpy(className, fdsi->GetClassInfo( cls )->GetName(), sizeof(className));
775
776 // write the variables, targets and spectator variables
777 for (UInt_t ivar = 0; ivar < ev->GetNVariables(); ivar++) varVals[ivar] = ev->GetValue( ivar );
778 for (UInt_t itgt = 0; itgt < ev->GetNTargets(); itgt++) tgtVals[itgt] = ev->GetTarget( itgt );
779 for (UInt_t ivis = 0; ivis < ev->GetNSpectators(); ivis++) visVals[ivis] = ev->GetSpectator( ivis );
780
781
782 // loop through all the results and write the branches
783 auto iMethod = 0;
784 for (auto && itMethod : fResults.at(t)) {
785 auto & results = *itMethod.second;
786 auto analysisType = results.GetAnalysisType();
787
788 auto const & vals = results[iEvt];
789
790 if (analysisType == Types::kClassification) {
791 metVals[iMethod][0] = vals[0];
792 } else if (analysisType == Types::kMulticlass) {
793 for (UInt_t nCls = 0; nCls < fdsi->GetNClasses(); nCls++) {
794 Float_t val = vals.at(nCls);
795 metVals[iMethod][nCls] = val;
796 }
797 } else if (analysisType == Types::kRegression) {
798 for (UInt_t nTgts = 0; nTgts < fdsi->GetNTargets(); nTgts++) {
799 Float_t val = vals.at(nTgts);
800 metVals[iMethod][nTgts] = val;
801 }
802 }
803 ++iMethod;
804 }
805 // fill the variables into the tree
806 tree->Fill();
807 }
808
809 Log() << kHEADER //<< Form("[%s] : ",fdsi.GetName())
810 << "Created tree '" << tree->GetName() << "' with " << tree->GetEntries() << " events" << Endl << Endl;
811
812 SetCurrentType(savedType);
813
814 delete[] varVals;
815 delete[] tgtVals;
816 delete[] visVals;
817
818 for(UInt_t i=0; i<fResults.at(t).size(); i++ )
819 delete[] metVals[i];
820 delete[] metVals;
821
822 return tree;
823}
824
int Int_t
Definition: CPyCppyy.h:43
unsigned int UInt_t
Definition: CPyCppyy.h:44
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
const Bool_t kFALSE
Definition: RtypesCore.h:101
bool Bool_t
Definition: RtypesCore.h:63
float Float_t
Definition: RtypesCore.h:57
const Bool_t kTRUE
Definition: RtypesCore.h:100
int type
Definition: TGX11.cxx:121
char * Form(const char *fmt,...)
Class that contains all the information of a class.
Definition: ClassInfo.h:49
Class that contains all the data information.
Definition: DataSetInfo.h:62
void DivideTrainingSet(UInt_t blockNum)
divide training set
Definition: DataSet.cxx:371
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Definition: DataSet.cxx:241
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:427
std::vector< Char_t > fSampling
Definition: DataSet.h:149
std::vector< Float_t > fSamplingWeight
Definition: DataSet.h:151
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:224
void ClearNClassEvents(Int_t type)
Definition: DataSet.cxx:160
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:443
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition: DataSet.cxx:572
void SetEventCollection(std::vector< Event * > *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
Definition: DataSet.cxx:250
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets,...
Definition: DataSet.cxx:609
const Event * GetEvent() const
Definition: DataSet.cxx:202
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:168
std::vector< Char_t > fBlockBelongToTraining
Definition: DataSet.h:165
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:232
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
Definition: DataSet.cxx:415
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition: DataSet.cxx:216
std::vector< Int_t > fSamplingNEvents
Definition: DataSet.h:150
DataSet()
constructor
Definition: DataSet.cxx:91
virtual ~DataSet()
destructor
Definition: DataSet.cxx:123
std::vector< std::vector< Long64_t > > fClassEvents
Definition: DataSet.h:158
void DeleteAllResults(Types::ETreeType type, Types::EAnalysisType analysistype)
Deletes all results currently in the dataset.
Definition: DataSet.cxx:343
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition: DataSet.cxx:459
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:151
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particular Method instance.
Definition: DataSet.cxx:316
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition: DataSet.cxx:508
TRandom3 * fSamplingRandom
Definition: DataSet.h:154
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:451
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
Definition: DataSet.cxx:189
void ApplyTrainingSetDivision()
apply division of data set
Definition: DataSet.cxx:395
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:435
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:236
UInt_t GetNSpectators() const
accessor to the number of spectators
Definition: Event.cxx:327
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:308
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:319
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition: Event.cxx:381
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Definition: Event.cxx:261
UInt_t GetClass() const
Definition: Event.h:86
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Class that is the base-class for a vector of result.
Definition: Results.h:57
void SetTreeType(Types::ETreeType type)
Definition: Results.h:67
EAnalysisType
Definition: Types.h:128
@ kMulticlass
Definition: Types.h:131
@ kNoAnalysisType
Definition: Types.h:132
@ kClassification
Definition: Types.h:129
@ kMaxAnalysisType
Definition: Types.h:133
@ kRegression
Definition: Types.h:130
@ kTrainingOriginal
Definition: Types.h:149
@ kTraining
Definition: Types.h:145
@ kValidation
Definition: Types.h:148
@ kTesting
Definition: Types.h:146
@ kDEBUG
Definition: Types.h:58
@ kHEADER
Definition: Types.h:65
@ kINFO
Definition: Types.h:60
@ kWARNING
Definition: Types.h:61
@ kFATAL
Definition: Types.h:63
The TNamed class is the base class for all named ROOT classes.
Definition: TNamed.h:29
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Random number generator class based on M.
Definition: TRandom3.h:27
Basic string class.
Definition: TString.h:136
TString & Append(const char *cs)
Definition: TString.h:564
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2336
A TTree represents a columnar dataset.
Definition: TTree.h:79
long long Long64_t
Definition: cpp_cppyy.h:13
const Int_t n
Definition: legend1.C:16
static const std::string name("name")
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Log(Double_t x)
Definition: TMath.h:760
Definition: tree.py:1
#define dest(otri, vertexptr)
Definition: triangle.c:1041