33 #ifndef ROOT_TMVA_DataSetInfo
36 #ifndef ROOT_TMVA_DataSet
39 #ifndef ROOT_TMVA_Event
42 #ifndef ROOT_TMVA_MsgLogger
45 #ifndef ROOT_TMVA_ResultsRegression
48 #ifndef ROOT_TMVA_ResultsClassification
51 #ifndef ROOT_TMVA_ResultsMulticlass
54 #ifndef ROOT_TMVA_Configurable
69 fEventCollection(4,(std::vector<
Event*>*)0),
72 fHasNegativeEventWeights(
kFALSE),
76 for (
UInt_t i=0; i<4; i++) fEventCollection[i] = new std::vector<Event*>;
90 for (
Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
107 fBlockBelongToTraining.clear();
109 for (std::vector< std::map< TString, Results* > >::iterator it = fResults.begin(); it != fResults.end(); it++) {
110 for (std::map< TString, Results* >::iterator itMap = (*it).begin(); itMap != (*it).end(); itMap++) {
111 delete itMap->second;
116 if (fSamplingRandom != 0 )
delete fSamplingRandom;
118 std::vector< std::pair< Float_t, Long64_t >* >::iterator itEv;
119 std::vector< std::vector<std::pair< Float_t, Long64_t >* > >::iterator treeIt;
120 for (treeIt = fSamplingEventList.begin(); treeIt != fSamplingEventList.end(); treeIt++ ) {
121 for (itEv = (*treeIt).begin(); itEv != (*treeIt).end(); itEv++) {
137 if (fClassEvents.size()<(
UInt_t)(type+1)) fClassEvents.resize( type+1 );
138 if (fClassEvents.at( type ).size() < classNumber+1) fClassEvents.at( type ).resize( classNumber+1 );
139 fClassEvents.at( type ).at( classNumber ) += 1;
146 if (fClassEvents.size()<(
UInt_t)(type+1)) fClassEvents.resize( type+1 );
147 fClassEvents.at( type ).clear();
155 return fClassEvents.at(type).at(classNumber);
157 catch (std::out_of_range excpt) {
158 ClassInfo* ci = fdsi.GetClassInfo( classNumber );
159 Log() <<
kFATAL <<
"No " << (type==0?
"training":(type==1?
"testing":
"_unknown_type_"))
160 <<
" events for class " << (ci==
NULL?
"_no_name_known_":ci->
GetName().
Data()) <<
" (index # "<<classNumber<<
")"
161 <<
" available. Check if all class names are spelled correctly and if events are"
162 <<
" passing the selection cuts." <<
Endl;
165 Log() <<
kFATAL <<
"ERROR/CAUGHT : DataSet/GetNClassEvents, .. unknown error" <<
Endl;
175 UInt_t i = TreeIndex(type);
176 if (i>=fEventCollection.size() || fEventCollection[i]==0)
return;
178 for (
UInt_t j=0; j<fEventCollection[i]->size(); j++)
delete (*fEventCollection[i])[j];
180 delete fEventCollection[i];
181 fEventCollection[i]=0;
188 if (fSampling.size() >
UInt_t(fCurrentTreeIdx) && fSampling.at(fCurrentTreeIdx)) {
189 Long64_t iEvt = fSamplingSelected.at(fCurrentTreeIdx).at( fCurrentEventIdx )->second;
190 return (*(fEventCollection.at(fCurrentTreeIdx))).at(iEvt);
193 return (*(fEventCollection.at(fCurrentTreeIdx))).at(fCurrentEventIdx);
202 return fdsi.GetNVariables();
210 return fdsi.GetNTargets();
218 return fdsi.GetNSpectators();
227 fEventCollection.at(
Int_t(type))->push_back(ev);
229 fEvtCollIt=fEventCollection.at(fCurrentTreeIdx)->begin();
237 Bool_t deleteEvents =
true;
238 DestroyCollection(type,deleteEvents);
240 const Int_t t = TreeIndex(type);
241 ClearNClassEvents( type );
242 fEventCollection.at(t) = events;
243 for (std::vector<Event*>::iterator it = fEventCollection.at(t)->begin(); it < fEventCollection.at(t)->end(); it++) {
244 IncrementNClassEvents( t, (*it)->GetClass() );
246 fEvtCollIt=fEventCollection.at(fCurrentTreeIdx)->begin();
268 if (t<fResults.size()) {
269 const std::map< TString, Results* >& resultsForType = fResults[
t];
270 std::map< TString, Results* >::const_iterator it = resultsForType.find(resultsName);
271 if (it!=resultsForType.end()) {
277 fResults.resize(t+1);
283 switch(analysistype) {
303 fResults[
t][resultsName] = newresults;
318 if (fResults.empty())
return;
320 if (
UInt_t(type) > fResults.size()){
321 Log()<<
kFATAL<<
"you asked for an Treetype (training/testing/...)"
322 <<
" whose index " << type <<
" does not exist " <<
Endl;
324 std::map< TString, Results* >& resultsForType = fResults[
UInt_t(type)];
325 std::map< TString, Results* >::iterator it = resultsForType.find(resultsName);
326 if (it!=resultsForType.end()) {
327 Log() <<
kDEBUG <<
" Delete Results previous existing result:" << resultsName
328 <<
" of type " << type <<
Endl;
330 resultsForType.erase(it->first);
333 Log() <<
kINFO <<
"could not fine Result class of " << resultsName
334 <<
" of type " << type <<
" which I should have deleted" <<
Endl;
344 if (fBlockBelongToTraining.size() == blockNum)
return;
346 if (fBlockBelongToTraining.size() == 1) {
347 if (fEventCollection[tOrg] == 0)
348 fEventCollection[tOrg]=
new std::vector<TMVA::Event*>(fEventCollection[tTrn]->size());
349 fEventCollection[tOrg]->clear();
350 for (
UInt_t i=0; i<fEventCollection[tTrn]->size(); i++)
351 fEventCollection[tOrg]->push_back((*fEventCollection[tTrn])[i]);
352 fClassEvents[tOrg] = fClassEvents[tTrn];
355 fBlockBelongToTraining.clear();
356 for (
UInt_t i=0 ; i < blockNum ; i++) fBlockBelongToTraining.push_back(
kTRUE);
358 ApplyTrainingSetDivision();
367 fEventCollection[tTrn]->clear();
368 if (fEventCollection[tVld]==0)
369 fEventCollection[tVld] =
new std::vector<TMVA::Event*>(fEventCollection[tOrg]->size());
370 fEventCollection[tVld]->clear();
373 for (
UInt_t i=0; i<fEventCollection[tOrg]->size(); i++) {
374 if (fBlockBelongToTraining[i % fBlockBelongToTraining.size()])
375 fEventCollection[tTrn]->push_back((*fEventCollection[tOrg])[i]);
377 fEventCollection[tVld]->push_back((*fEventCollection[tOrg])[i]);
387 fBlockBelongToTraining[blockInd]=
kFALSE;
389 fBlockBelongToTraining[blockInd]=
kTRUE;
390 if (applyChanges) ApplyTrainingSetDivision();
398 return GetNClassEvents(
Types::kTesting, fdsi.GetClassInfo(
"Signal")->GetNumber() );
406 return GetNClassEvents(
Types::kTesting, fdsi.GetClassInfo(
"Background")->GetNumber() );
414 return GetNClassEvents(
Types::kTraining, fdsi.GetClassInfo(
"Signal")->GetNumber() );
422 return GetNClassEvents(
Types::kTraining, fdsi.GetClassInfo(
"Background")->GetNumber() );
431 if (fSamplingRandom == 0 ) fSamplingRandom =
new TRandom3( seed );
434 std::vector< std::pair< Float_t, Long64_t >* > evtList;
435 std::vector< std::pair< Float_t, Long64_t >* >::iterator it;
437 Int_t treeIdx = TreeIndex( GetCurrentType() );
439 if (fSamplingEventList.size() <
UInt_t(treeIdx+1) ) fSamplingEventList.resize(treeIdx+1);
440 if (fSamplingSelected.size() <
UInt_t(treeIdx+1) ) fSamplingSelected.resize(treeIdx+1);
441 for (it = fSamplingEventList.at(treeIdx).begin(); it != fSamplingEventList.at(treeIdx).end(); it++ )
delete (*it);
442 fSamplingEventList.at(treeIdx).clear();
443 fSamplingSelected.at(treeIdx).clear();
445 if (fSampling.size() <
UInt_t(treeIdx+1) ) fSampling.resize(treeIdx+1);
446 if (fSamplingNEvents.size() <
UInt_t(treeIdx+1) ) fSamplingNEvents.resize(treeIdx+1);
447 if (fSamplingWeight.size() <
UInt_t(treeIdx+1) ) fSamplingWeight.resize(treeIdx+1);
449 if (fraction > 0.999999 || fraction < 0.0000001) {
450 fSampling.at( treeIdx ) =
false;
451 fSamplingNEvents.at( treeIdx ) = 0;
452 fSamplingWeight.at( treeIdx ) = 1.0;
457 fSampling.at( treeIdx ) =
false;
459 fSamplingNEvents.at( treeIdx ) =
Int_t(fraction*GetNEvents());
460 fSamplingWeight.at( treeIdx ) = weight;
463 fSamplingEventList.at( treeIdx ).reserve( nEvts );
464 fSamplingSelected.at( treeIdx ).reserve( fSamplingNEvents.at(treeIdx) );
465 for (
Long64_t ievt=0; ievt<nEvts; ievt++) {
466 std::pair<Float_t,Long64_t> *p =
new std::pair<Float_t,Long64_t>(1.0,ievt);
467 fSamplingEventList.at( treeIdx ).push_back( p );
471 fSampling.at( treeIdx ) =
true;
480 Int_t treeIdx = TreeIndex( GetCurrentType() );
482 if (!fSampling.at(treeIdx) )
return;
484 if (fSamplingRandom == 0 )
486 <<
"no random generator present for creating a random/importance sampling (initialized?)" <<
Endl;
489 fSamplingSelected.at(treeIdx).clear();
492 std::vector< std::pair< Float_t, Long64_t >* > evtList;
493 std::vector< std::pair< Float_t, Long64_t >* >::iterator evtListIt;
499 evtList.assign( fSamplingEventList.at(treeIdx).begin(), fSamplingEventList.at(treeIdx).end() );
502 for (evtListIt = evtList.begin(); evtListIt != evtList.end(); evtListIt++) {
503 sumWeights += (*evtListIt)->first;
505 evtListIt = evtList.begin();
508 std::vector< Float_t > rnds;
509 rnds.reserve(fSamplingNEvents.at(treeIdx));
512 for (
Int_t i = 0; i < fSamplingNEvents.at(treeIdx); i++) {
513 pos = fSamplingRandom->Rndm()*sumWeights;
514 rnds.push_back( pos );
518 std::sort(rnds.begin(),rnds.end());
521 std::vector< Float_t >::iterator rndsIt = rnds.begin();
522 Float_t runningSum = 0.000000001;
523 for (evtListIt = evtList.begin(); evtListIt != evtList.end();) {
524 runningSum += (*evtListIt)->first;
525 if (runningSum >= (*rndsIt)) {
526 fSamplingSelected.at(treeIdx).push_back( (*evtListIt) );
527 evtListIt = evtList.erase( evtListIt );
530 if (rndsIt == rnds.end() )
break;
545 if (!fSampling.at(fCurrentTreeIdx))
return;
546 if (fSamplingWeight.at(fCurrentTreeIdx) > 0.99999999999)
return;
549 Long64_t stop = fSamplingEventList.at(fCurrentTreeIdx).size() -1;
550 if (evtNumber >= 0) {
554 for (
Long64_t iEvt = start; iEvt <= stop; iEvt++ ){
555 if (
Long64_t(fSamplingEventList.at(fCurrentTreeIdx).size()) < iEvt) {
557 <<
") larger than number of sampled events ("
558 << fSamplingEventList.at(fCurrentTreeIdx).size() <<
" of tree " << fCurrentTreeIdx <<
")" <<
Endl;
561 Float_t weight = fSamplingEventList.at(fCurrentTreeIdx).at( iEvt )->first;
564 weight /= fSamplingWeight.at(fCurrentTreeIdx);
565 if (weight > 1.0 ) weight = 1.0;
569 weight *= fSamplingWeight.at(fCurrentTreeIdx);
571 fSamplingEventList.at(fCurrentTreeIdx).at( iEvt )->first = weight;
589 SetCurrentType(type);
590 const UInt_t t = TreeIndex(type);
591 if (fResults.size() <=
t) {
593 <<
" found. Size=" << fResults.size() <<
Endl;
607 char *className =
new char[40];
613 for(
UInt_t i=0; i<fResults.at(t).size(); i++ )
614 metVals[i] =
new Float_t[fdsi.GetNTargets()+fdsi.GetNClasses()];
617 tree->
Branch(
"classID", &cls,
"classID/I" );
618 tree->
Branch(
"className",(
void*)className,
"className/C" );
622 for (std::vector<VariableInfo>::const_iterator itVars = fdsi.GetVariableInfos().begin();
623 itVars != fdsi.GetVariableInfos().end(); itVars++) {
626 tree->
Branch( (*itVars).GetInternalName(), &varVals[
n], (*itVars).GetInternalName()+
TString(
"/F") );
631 for (std::vector<VariableInfo>::const_iterator itTgts = fdsi.GetTargetInfos().begin();
632 itTgts != fdsi.GetTargetInfos().end(); itTgts++) {
634 tree->
Branch( (*itTgts).GetInternalName(), &tgtVals[
n], (*itTgts).GetInternalName()+
TString(
"/F") );
639 for (std::vector<VariableInfo>::const_iterator itVis = fdsi.GetSpectatorInfos().begin();
640 itVis != fdsi.GetSpectatorInfos().end(); itVis++) {
642 tree->
Branch( (*itVis).GetInternalName(), &visVals[
n], (*itVis).GetInternalName()+
TString(
"/F") );
646 tree->
Branch(
"weight", &weight,
"weight/F" );
650 for (std::map< TString, Results* >::iterator itMethod = fResults.at(t).begin();
651 itMethod != fResults.at(t).end(); itMethod++) {
659 tree->
Branch( itMethod->first, &(metVals[n][0]), itMethod->first +
"/F" );
664 for (
UInt_t iCls = 0; iCls < fdsi.GetNClasses(); iCls++) {
665 if (iCls > 0) leafList.
Append(
":" );
666 leafList.
Append( fdsi.GetClassInfo( iCls )->GetName() );
669 Log() <<
kDEBUG <<
"itMethod->first " << itMethod->first <<
" LEAFLIST: "
670 << leafList <<
" itMethod->second " << itMethod->second <<
Endl;
671 tree->
Branch( itMethod->first, (metVals[n]), leafList );
676 for (
UInt_t iTgt = 0; iTgt < fdsi.GetNTargets(); iTgt++) {
677 if (iTgt > 0) leafList.
Append(
":" );
678 leafList.
Append( fdsi.GetTargetInfo( iTgt ).GetInternalName() );
682 Log() <<
kDEBUG <<
"itMethod->first " << itMethod->first <<
" LEAFLIST: "
683 << leafList <<
" itMethod->second " << itMethod->second <<
Endl;
684 tree->
Branch( itMethod->first, (metVals[n]), leafList );
687 Log() <<
kWARNING <<
"Unknown analysis type for result found when writing TestTree." <<
Endl;
694 for (
Long64_t iEvt = 0; iEvt < GetNEvents( type ); iEvt++) {
696 const Event* ev = GetEvent( iEvt );
700 TString tmp = fdsi.GetClassInfo( cls )->GetName();
701 for (
Int_t itmp = 0; itmp < tmp.
Sizeof(); itmp++) {
702 className[itmp] = tmp(itmp);
703 className[itmp+1] = 0;
714 for (std::map<TString, Results*>::iterator itMethod = fResults.at(t).begin();
715 itMethod != fResults.at(t).end(); itMethod++) {
716 Results* results = itMethod->second;
718 const std::vector< Float_t >& vals = results->operator[](iEvt);
722 metVals[
n][0] = vals[0];
726 for (
UInt_t nCls = 0, nClsEnd=fdsi.GetNClasses(); nCls < nClsEnd; nCls++) {
728 metVals[
n][nCls] = val;
733 for (
UInt_t nTgts = 0; nTgts < fdsi.GetNTargets(); nTgts++) {
735 metVals[
n][nTgts] = val;
746 SetCurrentType(savedType);
752 for(
UInt_t i=0; i<fResults.at(t).size(); i++ )
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
void SetEventCollection(std::vector< Event * > *, Types::ETreeType)
Sets the event collection (by DataSetFactory)
Random number generator class based on M.
MsgLogger & Endl(MsgLogger &ml)
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
virtual Int_t Fill()
Fill all branches.
TRandom3 * fSamplingRandom
void SetTreeType(Types::ETreeType type)
std::vector< Char_t > fBlockBelongToTraining
void ClearNClassEvents(Int_t type)
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
const char * Data() const
virtual ~DataSet()
destructor
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
TString & Append(const char *cs)
std::vector< std::vector< double > > Data
UInt_t GetNVariables() const
accessor to the number of variables
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
void ApplyTrainingSetDivision()
apply division of data set
UInt_t GetNSpectators() const
accessor to the number of spectators
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
std::vector< std::vector< Long64_t > > fClassEvents
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particulary Method instance (here appareantly called resultsName i...
void DivideTrainingSet(UInt_t blockNum)
divide training set
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
const TString & GetName() const
virtual const char * GetName() const
Returns name of object.
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
void CreateSampling() const
create an event sampling (random or importance sampling)
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
const Event * GetEvent() const
std::vector< Char_t > fSampling
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
std::vector< Float_t > fSamplingWeight
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
UInt_t GetNTargets() const
accessor to the number of targets
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
std::vector< Int_t > fSamplingNEvents
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Float_t GetTarget(UInt_t itgt) const
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
#define dest(otri, vertexptr)
virtual Int_t Sizeof() const
Returns size string will occupy on I/O buffer.
TString()
TString default ctor.
virtual Long64_t GetEntries() const
A TTree object has a header with a name and a title.
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling