64 fHasNegativeEventWeights(
kFALSE),
96 fHasNegativeEventWeights(
kFALSE),
130 fBlockBelongToTraining.clear();
132 for (std::vector< std::map< TString, Results* > >::iterator it = fResults.begin(); it != fResults.end(); ++it) {
134 delete itMap->second;
139 if (fSamplingRandom != 0 )
delete fSamplingRandom;
153 if (fClassEvents.size()<(
UInt_t)(
type+1)) fClassEvents.resize(
type+1 );
162 if (fClassEvents.size()<(
UInt_t)(
type+1)) fClassEvents.resize(
type+1 );
163 fClassEvents.at(
type ).clear();
173 catch (std::out_of_range &) {
175 Log() << kFATAL <<
TString::Format(
"Dataset[%s] : ",fdsi->GetName()) <<
"No " << (
type==0?
"training":(
type==1?
"testing":
"_unknown_type_"))
176 <<
" events for class " << (
ci==
NULL?
"_no_name_known_":
ci->GetName()) <<
" (index # "<<
classNumber<<
")"
177 <<
" available. Check if all class names are spelled correctly and if events are"
178 <<
" passing the selection cuts." <<
Endl;
181 Log() << kFATAL <<
TString::Format(
"Dataset[%s] : ",fdsi->GetName()) <<
"ERROR/CAUGHT : DataSet/GetNClassEvents, .. unknown error" <<
Endl;
192 if (i>=fEventCollection.size() || fEventCollection[i].size()==0)
return;
195 for (
UInt_t j=0;
j<fEventCollection[i].size();
j++)
delete fEventCollection[i][
j];
197 fEventCollection[i].clear();
204 if (fSampling.size() >
UInt_t(fCurrentTreeIdx) && fSampling.at(fCurrentTreeIdx)) {
205 Long64_t iEvt = fSamplingSelected.at(fCurrentTreeIdx).at( fCurrentEventIdx ).second;
206 return ((fEventCollection.at(fCurrentTreeIdx))).at(
iEvt);
209 return ((fEventCollection.at(fCurrentTreeIdx))).at(fCurrentEventIdx);
218 return fdsi->GetNVariables();
226 return fdsi->GetNTargets();
234 return fdsi->GetNSpectators();
244 if (
ev->GetWeight()<0) fHasNegativeEventWeights =
kTRUE;
255 ClearNClassEvents(
type );
257 fEventCollection.at(t) = *events;
258 for (std::vector<Event*>::iterator it = fEventCollection.at(t).begin(); it < fEventCollection.at(t).end(); ++it) {
259 IncrementNClassEvents( t, (*it)->GetClass() );
270 if (t<fResults.size()) {
271 const std::map< TString, Results* >&
resultsForType = fResults[t];
279 fResults.resize(t+1);
320 if (fResults.empty())
return;
323 Log()<<kFATAL<<
TString::Format(
"Dataset[%s] : ",fdsi->GetName()) <<
"you asked for an Treetype (training/testing/...)"
324 <<
" whose index " <<
type <<
" does not exist " <<
Endl;
329 Log() << kDEBUG <<
TString::Format(
"Dataset[%s] : ",fdsi->GetName()) <<
" Delete Results previous existing result:" <<
resultsName
336 <<
" of type " <<
type <<
" which I should have deleted" <<
Endl;
346 if (fResults.empty())
return;
349 Log()<<kFATAL<<
TString::Format(
"Dataset[%s] : ",fdsi->GetName()) <<
"you asked for an Treetype (training/testing/...)"
350 <<
" whose index " <<
type <<
" does not exist " <<
Endl;
359 <<
" DeleteAllResults previous existing result: "
375 if (fBlockBelongToTraining.size() ==
blockNum)
return;
377 if (fBlockBelongToTraining.size() == 1) {
378 if (fEventCollection[
tOrg].
size() == 0)
379 fEventCollection[
tOrg].resize(fEventCollection[
tTrn].
size());
380 fEventCollection[
tOrg].clear();
381 for (
UInt_t i=0; i<fEventCollection[
tTrn].size(); i++)
382 fEventCollection[
tOrg].push_back(fEventCollection[
tTrn][i]);
383 fClassEvents[
tOrg] = fClassEvents[
tTrn];
386 fBlockBelongToTraining.clear();
389 ApplyTrainingSetDivision();
398 fEventCollection[
tTrn].clear();
399 if (fEventCollection[
tVld].
size()==0)
400 fEventCollection[
tVld].resize(fEventCollection[
tOrg].
size());
401 fEventCollection[
tVld].clear();
404 for (
UInt_t i=0; i<fEventCollection[
tOrg].size(); i++) {
405 if (fBlockBelongToTraining[i % fBlockBelongToTraining.size()])
406 fEventCollection[
tTrn].push_back(fEventCollection[
tOrg][i]);
408 fEventCollection[
tVld].push_back(fEventCollection[
tOrg][i]);
429 return GetNClassEvents(
Types::kTesting, fdsi->GetClassInfo(
"Signal")->GetNumber() );
437 return GetNClassEvents(
Types::kTesting, fdsi->GetClassInfo(
"Background")->GetNumber() );
445 return GetNClassEvents(
Types::kTraining, fdsi->GetClassInfo(
"Signal")->GetNumber() );
453 return GetNClassEvents(
Types::kTraining, fdsi->GetClassInfo(
"Background")->GetNumber() );
462 if (fSamplingRandom == 0 ) fSamplingRandom =
new TRandom3( seed );
465 std::vector< std::pair< Float_t, Long64_t >* >
evtList;
472 fSamplingEventList.at(
treeIdx).clear();
473 fSamplingSelected.at(
treeIdx).clear();
480 fSampling.at(
treeIdx ) =
false;
481 fSamplingNEvents.at(
treeIdx ) = 0;
482 fSamplingWeight.at(
treeIdx ) = 1.0;
487 fSampling.at(
treeIdx ) =
false;
490 fSamplingWeight.at(
treeIdx ) = weight;
494 fSamplingSelected.at(
treeIdx ).reserve( fSamplingNEvents.at(
treeIdx) );
496 std::pair<Float_t,Long64_t>
p(1.0,
ievt);
497 fSamplingEventList.at(
treeIdx ).push_back(
p );
501 fSampling.at(
treeIdx ) =
true;
512 if (!fSampling.at(
treeIdx) )
return;
514 if (fSamplingRandom == 0 )
516 <<
"no random generator present for creating a random/importance sampling (initialized?)" <<
Endl;
519 fSamplingSelected.at(
treeIdx).clear();
522 std::vector< std::pair< Float_t, Long64_t > >
evtList;
523 std::vector< std::pair< Float_t, Long64_t > >::iterator
evtListIt;
538 std::vector< Float_t >
rnds;
542 for (
Int_t i = 0; i < fSamplingNEvents.at(
treeIdx); i++) {
544 rnds.push_back( pos );
575 if (!fSampling.at(fCurrentTreeIdx))
return;
576 if (fSamplingWeight.at(fCurrentTreeIdx) > 0.99999999999)
return;
579 Long64_t stop = fSamplingEventList.at(fCurrentTreeIdx).size() -1;
585 if (
Long64_t(fSamplingEventList.at(fCurrentTreeIdx).size()) <
iEvt) {
586 Log() << kWARNING <<
TString::Format(
"Dataset[%s] : ",fdsi->GetName()) <<
"event number (" <<
iEvt
587 <<
") larger than number of sampled events ("
588 << fSamplingEventList.at(fCurrentTreeIdx).size() <<
" of tree " << fCurrentTreeIdx <<
")" <<
Endl;
591 Float_t weight = fSamplingEventList.at(fCurrentTreeIdx).at(
iEvt ).first;
594 weight /= fSamplingWeight.at(fCurrentTreeIdx);
595 if (weight > 1.0 ) weight = 1.0;
599 weight *= fSamplingWeight.at(fCurrentTreeIdx);
601 fSamplingEventList.at(fCurrentTreeIdx).at(
iEvt ).first = weight;
619 SetCurrentType(
type);
621 if (fResults.size() <= t) {
623 <<
" found. Size=" << fResults.size() <<
Endl;
643 for(
UInt_t i=0; i<fResults.at(t).
size(); i++ )
644 metVals[i] =
new Float_t[fdsi->GetNTargets()+fdsi->GetNClasses()];
647 tree->Branch(
"classID", &
cls,
"classID/I" );
648 tree->Branch(
"className", className,
"className/C" );
653 Int_t arraySize = -1;
654 for (std::vector<VariableInfo>::const_iterator
itVars = fdsi->GetVariableInfos().
begin();
659 tree->Branch( (*itVars).GetInternalName(), &
varVals[
n], (*itVars).GetInternalName()+
TString(
"/F") );
664 name.ReplaceAll(
"[0]",
"");
665 arraySize = fdsi->GetVarArraySize((*itVars).GetExpression());
667 Log() << kDEBUG <<
"creating branch for array " <<
name <<
" with size " << arraySize <<
Endl;
677 for (std::vector<VariableInfo>::const_iterator
itTgts = fdsi->GetTargetInfos().
begin();
680 tree->Branch( (*itTgts).GetInternalName(), &
tgtVals[
n], (*itTgts).GetInternalName()+
TString(
"/F") );
685 for (std::vector<VariableInfo>::const_iterator
itVis = fdsi->GetSpectatorInfos().
begin();
688 tree->Branch( (*itVis).GetInternalName(), &
visVals[
n], (*itVis).GetInternalName()+
TString(
"/F") );
692 tree->Branch(
"weight", &weight,
"weight/F" );
696 for (std::map< TString, Results* >::iterator
itMethod = fResults.at(t).
begin();
713 leafList.Append( fdsi->GetClassInfo(
iCls )->GetName() );
716 Log() << kDEBUG <<
TString::Format(
"Dataset[%s] : ",fdsi->GetName()) <<
"itMethod->first " <<
itMethod->first <<
" LEAFLIST: "
725 leafList.Append( fdsi->GetTargetInfo(
iTgt ).GetInternalName() );
729 Log() << kDEBUG <<
TString::Format(
"Dataset[%s] : ",fdsi->GetName()) <<
"itMethod->first " <<
itMethod->first <<
" LEAFLIST: "
734 Log() << kWARNING <<
TString::Format(
"Dataset[%s] : ",fdsi->GetName()) <<
"Unknown analysis type for result found when writing TestTree." <<
Endl;
741 for (
auto &&
itMethod : fResults.at(t)) {
747 auto analysisType =
results->GetAnalysisType();
755 Log() << kFATAL <<
"Unexpected analysisType." <<
Endl;
759 Log() << kFATAL <<
"An error occurred in DataSet::GetTree. "
760 "Inconsistent size of result for result with name '"
773 cls =
ev->GetClass();
774 weight =
ev->GetWeight();
775 strlcpy(className, fdsi->GetClassInfo(
cls )->GetName(),
sizeof(className));
785 for (
auto &&
itMethod : fResults.at(t)) {
787 auto analysisType =
results.GetAnalysisType();
811 <<
"Created tree '" << tree->GetName() <<
"' with " << tree->GetEntries() <<
" events" <<
Endl <<
Endl;
819 for(
UInt_t i=0; i<fResults.at(t).
size(); i++ )
823 tree->ResetBranchAddresses();
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t dest
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
const_iterator begin() const
const_iterator end() const
Class that contains all the information of a class.
Class that contains all the data information.
void DivideTrainingSet(UInt_t blockNum)
divide training set
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
std::vector< Char_t > fSampling
random or importance sampling (not all events are taken) !! Bool_t are stored ( no std::vector<bool> ...
std::vector< Float_t > fSamplingWeight
weight change factor [weight is indicating if sampling is random (1.0) or importance (<1....
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
void ClearNClassEvents(Int_t type)
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
void SetEventCollection(std::vector< Event * > *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets,...
const Event * GetEvent() const
returns event without transformations
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
std::vector< Char_t > fBlockBelongToTraining
when dividing the dataset to blocks, sets whether the certain block is in the Training set or else in...
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
std::vector< Int_t > fSamplingNEvents
number of events which should be sampled
virtual ~DataSet()
destructor
std::vector< std::vector< Long64_t > > fClassEvents
number of events of class 0,1,2,... in training[0] and testing[1] (+validation, trainingoriginal)
void DeleteAllResults(Types::ETreeType type, Types::EAnalysisType analysistype)
Deletes all results currently in the dataset.
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particular Method instance.
void CreateSampling() const
create an event sampling (random or importance sampling)
TRandom3 * fSamplingRandom
-> random generator for sampling
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
void ApplyTrainingSetDivision()
apply division of data set
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
ostringstream derivative to redirect and format output
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Class that is the base-class for a vector of result.
@ kTrainingOriginal
ever needed
@ kValidation
these are placeholders... currently not used, but could be moved "forward" if
The TNamed class is the base class for all named ROOT classes.
Random number generator class based on M.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
A TTree represents a columnar dataset.
MsgLogger & Endl(MsgLogger &ml)