81 for (
Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
113 for (
Int_t treeIdx = 0; treeIdx < treeNum; treeIdx++) {
132 for (std::vector< std::map< TString, Results* > >::iterator it =
fResults.begin(); it !=
fResults.end(); ++it) {
133 for (std::map< TString, Results* >::iterator itMap = (*it).begin(); itMap != (*it).end(); ++itMap) {
134 delete itMap->second;
173 catch (std::out_of_range &) {
175 Log() << kFATAL <<
TString::Format(
"Dataset[%s] : ",
fdsi->GetName()) <<
"No " << (type==0?
"training":(type==1?
"testing":
"_unknown_type_"))
176 <<
" events for class " << (ci==NULL?
"_no_name_known_":ci->
GetName()) <<
" (index # "<<classNumber<<
")"
177 <<
" available. Check if all class names are spelled correctly and if events are"
178 <<
" passing the selection cuts." <<
Endl;
181 Log() << kFATAL <<
TString::Format(
"Dataset[%s] : ",
fdsi->GetName()) <<
"ERROR/CAUGHT : DataSet/GetNClassEvents, .. unknown error" <<
Endl;
218 return fdsi->GetNVariables();
226 return fdsi->GetNTargets();
234 return fdsi->GetNSpectators();
271 const std::map< TString, Results* >& resultsForType =
fResults[t];
272 std::map< TString, Results* >::const_iterator it = resultsForType.find(resultsName);
273 if (it!=resultsForType.end()) {
285 switch(analysistype) {
305 fResults[t][resultsName] = newresults;
323 Log()<<kFATAL<<
TString::Format(
"Dataset[%s] : ",
fdsi->GetName()) <<
"you asked for an Treetype (training/testing/...)"
324 <<
" whose index " << type <<
" does not exist " <<
Endl;
326 std::map< TString, Results* >& resultsForType =
fResults[
UInt_t(type)];
327 std::map< TString, Results* >::iterator it = resultsForType.find(resultsName);
328 if (it!=resultsForType.end()) {
329 Log() << kDEBUG <<
TString::Format(
"Dataset[%s] : ",
fdsi->GetName()) <<
" Delete Results previous existing result:" << resultsName
330 <<
" of type " << type <<
Endl;
332 resultsForType.erase(it->first);
335 Log() << kINFO <<
TString::Format(
"Dataset[%s] : ",
fdsi->GetName()) <<
"could not fine Result class of " << resultsName
336 <<
" of type " << type <<
" which I should have deleted" <<
Endl;
349 Log()<<kFATAL<<
TString::Format(
"Dataset[%s] : ",
fdsi->GetName()) <<
"you asked for an Treetype (training/testing/...)"
350 <<
" whose index " << type <<
" does not exist " <<
Endl;
353 std::map<TString, Results *> & resultsForType =
fResults[
UInt_t(type)];
355 for (
auto && it : resultsForType) {
356 auto & resultsName = it.first;
359 <<
" DeleteAllResults previous existing result: "
360 << resultsName <<
" of type " << type <<
Endl;
365 resultsForType.clear();
465 std::vector< std::pair< Float_t, Long64_t >* > evtList;
479 if (fraction > 0.999999 || fraction < 0.0000001) {
495 for (
Long64_t ievt=0; ievt<nEvts; ievt++) {
496 std::pair<Float_t,Long64_t> p(1.0,ievt);
516 <<
"no random generator present for creating a random/importance sampling (initialized?)" <<
Endl;
522 std::vector< std::pair< Float_t, Long64_t > > evtList;
523 std::vector< std::pair< Float_t, Long64_t > >::iterator evtListIt;
532 for (evtListIt = evtList.begin(); evtListIt != evtList.end(); ++evtListIt) {
533 sumWeights += (*evtListIt).first;
535 evtListIt = evtList.begin();
538 std::vector< Float_t > rnds;
544 rnds.push_back( pos );
548 std::sort(rnds.begin(),rnds.end());
551 std::vector< Float_t >::iterator rndsIt = rnds.begin();
552 Float_t runningSum = 0.000000001;
553 for (evtListIt = evtList.begin(); evtListIt != evtList.end();) {
554 runningSum += (*evtListIt).first;
555 if (runningSum >= (*rndsIt)) {
557 evtListIt = evtList.erase( evtListIt );
560 if (rndsIt == rnds.end() )
break;
580 if (evtNumber >= 0) {
587 <<
") larger than number of sampled events ("
595 if (weight > 1.0 ) weight = 1.0;
647 tree->
Branch(
"classID", &cls,
"classID/I" );
648 tree->
Branch(
"className", className,
"className/C" );
652 Int_t ivar_array = 0;
653 Int_t arraySize = -1;
654 for (std::vector<VariableInfo>::const_iterator itVars =
fdsi->GetVariableInfos().begin();
655 itVars !=
fdsi->GetVariableInfos().end(); ++itVars) {
659 tree->
Branch( (*itVars).GetInternalName(), &varVals[
n], (*itVars).GetInternalName()+
TString(
"/F") );
662 if (ivar_array == 0) {
664 name.ReplaceAll(
"[0]",
"");
665 arraySize =
fdsi->GetVarArraySize((*itVars).GetExpression());
667 Log() << kDEBUG <<
"creating branch for array " <<
name <<
" with size " << arraySize <<
Endl;
670 if (ivar_array == arraySize)
677 for (std::vector<VariableInfo>::const_iterator itTgts =
fdsi->GetTargetInfos().begin();
678 itTgts !=
fdsi->GetTargetInfos().end(); ++itTgts) {
680 tree->
Branch( (*itTgts).GetInternalName(), &tgtVals[
n], (*itTgts).GetInternalName()+
TString(
"/F") );
685 for (std::vector<VariableInfo>::const_iterator itVis =
fdsi->GetSpectatorInfos().begin();
686 itVis !=
fdsi->GetSpectatorInfos().end(); ++itVis) {
688 tree->
Branch( (*itVis).GetInternalName(), &visVals[
n], (*itVis).GetInternalName()+
TString(
"/F") );
692 tree->
Branch(
"weight", &weight,
"weight/F" );
696 for (std::map< TString, Results* >::iterator itMethod =
fResults.at(t).begin();
697 itMethod !=
fResults.at(t).end(); ++itMethod) {
701 <<
"analysis type: " << (itMethod->second->GetAnalysisType()==
Types::kRegression ?
"Regression" :
706 tree->
Branch( itMethod->first, &(metVals[
n][0]), itMethod->first +
"/F" );
711 for (
UInt_t iCls = 0; iCls <
fdsi->GetNClasses(); iCls++) {
712 if (iCls > 0) leafList.
Append(
":" );
713 leafList.
Append(
fdsi->GetClassInfo( iCls )->GetName() );
716 Log() << kDEBUG <<
TString::Format(
"Dataset[%s] : ",
fdsi->GetName()) <<
"itMethod->first " << itMethod->first <<
" LEAFLIST: "
717 << leafList <<
" itMethod->second " << itMethod->second <<
Endl;
718 tree->
Branch( itMethod->first, (metVals[
n]), leafList );
723 for (
UInt_t iTgt = 0; iTgt <
fdsi->GetNTargets(); iTgt++) {
724 if (iTgt > 0) leafList.
Append(
":" );
725 leafList.
Append(
fdsi->GetTargetInfo( iTgt ).GetInternalName() );
729 Log() << kDEBUG <<
TString::Format(
"Dataset[%s] : ",
fdsi->GetName()) <<
"itMethod->first " << itMethod->first <<
" LEAFLIST: "
730 << leafList <<
" itMethod->second " << itMethod->second <<
Endl;
731 tree->
Branch( itMethod->first, (metVals[
n]), leafList );
734 Log() << kWARNING <<
TString::Format(
"Dataset[%s] : ",
fdsi->GetName()) <<
"Unknown analysis type for result found when writing TestTree." <<
Endl;
741 for (
auto && itMethod :
fResults.at(t)) {
743 auto results = itMethod.second;
744 auto resultsName = itMethod.first;
747 auto analysisType = results->GetAnalysisType();
755 Log() << kFATAL <<
"Unexpected analysisType." <<
Endl;
758 if (numEventsResults != numEvents) {
759 Log() << kFATAL <<
"An error occurred in DataSet::GetTree. "
760 "Inconsistent size of result for result with name '"
761 << resultsName <<
"'."
762 <<
" Size is '" << std::to_string(numEventsResults)
764 <<
" Expected '" << numEvents <<
"'." <<
Endl;
775 strlcpy(className,
fdsi->GetClassInfo( cls )->GetName(),
sizeof(className));
785 for (
auto && itMethod :
fResults.at(t)) {
786 auto & results = *itMethod.second;
787 auto analysisType = results.GetAnalysisType();
789 auto const & vals = results[iEvt];
792 metVals[iMethod][0] = vals[0];
794 for (
UInt_t nCls = 0; nCls <
fdsi->GetNClasses(); nCls++) {
796 metVals[iMethod][nCls] = val;
799 for (
UInt_t nTgts = 0; nTgts <
fdsi->GetNTargets(); nTgts++) {
801 metVals[iMethod][nTgts] = val;
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
int Int_t
Signed integer 4 bytes (int).
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
bool Bool_t
Boolean (0=false, 1=true) (bool).
long long Long64_t
Portable signed long integer 8 bytes.
float Float_t
Float 4 bytes (float).
Class that contains all the information of a class.
Class that contains all the data information.
void DivideTrainingSet(UInt_t blockNum)
divide training set
const DataSetInfo * fdsi
-> datasetinfo that created this dataset
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
std::vector< Char_t > fSampling
random or importance sampling (not all events are taken) !! Bool_t are stored ( no std::vector<bool> ...
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingEventList
weights and indices for sampling
std::vector< Float_t > fSamplingWeight
weight change factor [weight is indicating if sampling is random (1.0) or importance (<1....
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
void ClearNClassEvents(Int_t type)
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
std::vector< std::map< TString, Results * > > fResults
! [train/test/...][method-identifier]
void SetEventCollection(std::vector< Event * > *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory).
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets,...
const Event * GetEvent() const
returns event without transformations
Types::ETreeType GetCurrentType() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Long64_t fCurrentEventIdx
std::vector< Char_t > fBlockBelongToTraining
when dividing the dataset to blocks, sets whether the certain block is in the Training set or else in...
MsgLogger * fLogger
! message logger
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
std::vector< Int_t > fSamplingNEvents
number of events which should be sampled
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingSelected
selected events
virtual ~DataSet()
destructor
std::vector< std::vector< Long64_t > > fClassEvents
number of events of class 0,1,2,... in training[0] and testing[1] (+validation, trainingoriginal)
void DeleteAllResults(Types::ETreeType type, Types::EAnalysisType analysistype)
Deletes all results currently in the dataset.
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
UInt_t TreeIndex(Types::ETreeType type) const
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particular Method instance.
Bool_t fHasNegativeEventWeights
true if at least one signal or bkg event has negative weight
Long64_t fTrainingBlockSize
block size into which the training dataset is divided
void CreateSampling() const
create an event sampling (random or importance sampling)
std::vector< std::vector< Event * > > fEventCollection
list of events for training/testing/...
void SetCurrentType(Types::ETreeType type) const
TRandom3 * fSamplingRandom
-> random generator for sampling
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
void ApplyTrainingSetDivision()
apply division of data set
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
UInt_t GetNSpectators() const
accessor to the number of spectators
UInt_t GetNVariables() const
accessor to the number of variables
UInt_t GetNTargets() const
accessor to the number of targets
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Float_t GetTarget(UInt_t itgt) const
ostringstream derivative to redirect and format output
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Class that is the base-class for a vector of result.
void SetTreeType(Types::ETreeType type)
@ kTrainingOriginal
ever needed
@ kValidation
these are placeholders... currently not used, but could be moved "forward" if
const char * GetName() const override
Returns name of object.
Random number generator class based on M.
TString & Append(const char *cs)
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
A TTree represents a columnar dataset.
virtual Int_t Fill()
Fill all branches.
virtual Long64_t GetEntries() const
TBranch * Branch(const char *name, T *obj, Int_t bufsize=32000, Int_t splitlevel=99)
Add a new branch, and infer the data type from the type of obj being passed.
virtual void ResetBranchAddresses()
Tell all of our branches to drop their current objects and allocate new ones.
MsgLogger & Endl(MsgLogger &ml)