73using std::setiosflags, std::ios;
96 fScaleWithPreselEff(0),
100 fLogger( new
MsgLogger(
"DataSetFactory", kINFO) )
109 std::vector<TTreeFormula*>::const_iterator formIt;
111 for (formIt = fInputFormulas.begin() ; formIt!=fInputFormulas.end() ; ++formIt)
if (*formIt)
delete *formIt;
112 for (formIt = fTargetFormulas.begin() ; formIt!=fTargetFormulas.end() ; ++formIt)
if (*formIt)
delete *formIt;
113 for (formIt = fCutFormulas.begin() ; formIt!=fCutFormulas.end() ; ++formIt)
if (*formIt)
delete *formIt;
114 for (formIt = fWeightFormula.begin() ; formIt!=fWeightFormula.end() ; ++formIt)
if (*formIt)
delete *formIt;
115 for (formIt = fSpectatorFormulas.begin(); formIt!=fSpectatorFormulas.end(); ++formIt)
if (*formIt)
delete *formIt;
127 DataSet * ds = BuildInitialDataSet( dsi, dataInput );
129 if (ds->
GetNEvents() > 1 && fComputeCorrelations ) {
151 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"Build DataSet consisting of one Event with dynamically changing variables" <<
Endl;
161 std::vector<Float_t*>* evdyn =
new std::vector<Float_t*>(0);
165 if (varinfos.empty())
166 Log() << kFATAL <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"Dynamic data set cannot be built, since no variable informations are present. Apparently no variables have been set. This should not happen, please contact the TMVA authors." <<
Endl;
168 std::vector<VariableInfo>::iterator it = varinfos.begin(), itEnd=varinfos.end();
169 for (;it!=itEnd;++it) {
172 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"The link to the external variable is NULL while I am trying to build a dynamic data set. In this case fTmpEvent from MethodBase HAS TO BE USED in the method to get useful values in variables." <<
Endl;
173 else evdyn->push_back (external);
177 std::vector<char> spectatorTypes;
178 spectatorTypes.reserve(spectatorinfos.size());
179 for (
auto &&info: spectatorinfos) {
180 evdyn->push_back( (
Float_t*)info.GetExternalLink() );
181 spectatorTypes.push_back(info.GetVarType());
184 TMVA::Event * ev =
new Event((
const std::vector<Float_t*>*&)evdyn, varinfos.size());
186 std::vector<Event *> *newEventVector =
new std::vector<Event *>;
187 newEventVector->push_back(ev);
193 delete newEventVector;
205 if (dataInput.
GetEntries()==0)
return BuildDynamicDataSet( dsi );
210 std::vector< TString >* classList = dataInput.
GetClassList();
211 for (std::vector<TString>::iterator it = classList->begin(); it< classList->end(); ++it) {
222 InitOptions( dsi, eventCounts, normMode, splitSeed, splitMode , mixMode );
225 BuildEventVector( dsi, dataInput, tmpEventVector, eventCounts );
227 DataSet* ds = MixEvents( dsi, tmpEventVector, eventCounts,
228 splitMode, mixMode, normMode, splitSeed );
231 if (showCollectedOutput) {
233 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"Collected:" <<
Endl;
235 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" "
238 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" "
258 Log() << kFATAL <<
"Expression " << expression.
Data()
259 <<
" could not be resolved to a valid formula. " <<
Endl;
261 Log() << kWARNING <<
"Expression: " << expression.
Data()
262 <<
" does not provide data for this event. "
263 <<
"This event is not taken into account. --> please check if you use as a variable "
264 <<
"an entry of an array which is not filled for some events "
265 <<
"(e.g. arr[4] when arr has only 3 elements)." <<
Endl;
266 Log() << kWARNING <<
"If you want to take the event into account you can do something like: "
267 <<
"\"Alt$(arr[4],0)\" where in cases where arr doesn't have a 4th element, "
268 <<
" 0 is taken as an alternative." <<
Endl;
275 for (
int i = 0, iEnd = ttf->
GetNcodes (); i < iEnd; ++i)
303 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" create input formulas for tree " << tr->
GetName() <<
Endl;
304 std::vector<TTreeFormula*>::const_iterator formIt, formItEnd;
305 for (formIt = fInputFormulas.begin(), formItEnd=fInputFormulas.end(); formIt!=formItEnd; ++formIt)
if (*formIt)
delete *formIt;
306 fInputFormulas.clear();
308 fInputTableFormulas.clear();
310 bool firstArrayVar =
kTRUE;
311 int firstArrayVarIndex = -1;
320 fInputFormulas.emplace_back(ttf);
321 fInputTableFormulas.emplace_back(std::make_pair(ttf, (
Int_t) 0));
330 fInputFormulas.push_back(ttf);
334 firstArrayVarIndex = i;
339 fInputTableFormulas.push_back(std::make_pair(ttf, (
Int_t) i-firstArrayVarIndex));
340 if (
int(i)-firstArrayVarIndex == arraySize-1 ) {
342 firstArrayVar =
kTRUE;
343 firstArrayVarIndex = -1;
353 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"transform regression targets" <<
Endl;
354 for (formIt = fTargetFormulas.begin(), formItEnd = fTargetFormulas.end(); formIt!=formItEnd; ++formIt)
if (*formIt)
delete *formIt;
355 fTargetFormulas.clear();
360 fTargetFormulas.push_back( ttf );
366 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"transform spectator variables" <<
Endl;
367 for (formIt = fSpectatorFormulas.begin(), formItEnd = fSpectatorFormulas.end(); formIt!=formItEnd; ++formIt)
if (*formIt)
delete *formIt;
368 fSpectatorFormulas.clear();
373 fSpectatorFormulas.push_back( ttf );
379 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"transform cuts" <<
Endl;
380 for (formIt = fCutFormulas.begin(), formItEnd = fCutFormulas.end(); formIt!=formItEnd; ++formIt)
if (*formIt)
delete *formIt;
381 fCutFormulas.clear();
388 Bool_t worked = CheckTTreeFormula( ttf, tmpCutExp, hasDollar );
394 fCutFormulas.push_back( ttf );
400 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"transform weights" <<
Endl;
401 for (formIt = fWeightFormula.begin(), formItEnd = fWeightFormula.end(); formIt!=formItEnd; ++formIt)
if (*formIt)
delete *formIt;
402 fWeightFormula.clear();
407 fWeightFormula.push_back( 0 );
413 ttf =
new TTreeFormula(
"FormulaWeight", tmpWeight, tr );
414 Bool_t worked = CheckTTreeFormula( ttf, tmpWeight, hasDollar );
423 fWeightFormula.push_back( ttf );
428 Log() << kDEBUG <<
Form(
"Dataset[%s] : ", dsi.
GetName()) <<
"enable branches" <<
Endl;
433 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"enable branches: input variables" <<
Endl;
435 for (formIt = fInputFormulas.begin(); formIt!=fInputFormulas.end(); ++formIt) {
442 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"enable branches: targets" <<
Endl;
443 for (formIt = fTargetFormulas.begin(); formIt!=fTargetFormulas.end(); ++formIt) {
449 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"enable branches: spectators" <<
Endl;
450 for (formIt = fSpectatorFormulas.begin(); formIt!=fSpectatorFormulas.end(); ++formIt) {
456 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"enable branches: cuts" <<
Endl;
457 for (formIt = fCutFormulas.begin(); formIt!=fCutFormulas.end(); ++formIt) {
464 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"enable branches: weights" <<
Endl;
465 for (formIt = fWeightFormula.begin(); formIt!=fWeightFormula.end(); ++formIt) {
472 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"tree initialized" <<
Endl;
492 for (
UInt_t ivar=0; ivar<nvar ; ivar++) { min[ivar] = FLT_MAX; max[ivar] = -FLT_MAX; }
493 for (
UInt_t ivar=0; ivar<ntgts; ivar++) { tgmin[ivar] = FLT_MAX; tgmax[ivar] = -FLT_MAX; }
494 for (
UInt_t ivar=0; ivar<nvis; ivar++) { vmin[ivar] = FLT_MAX; vmax[ivar] = -FLT_MAX; }
500 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
502 if (
v<min[ivar]) min[ivar] =
v;
503 if (
v>max[ivar]) max[ivar] =
v;
505 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
507 if (
v<tgmin[itgt]) tgmin[itgt] =
v;
508 if (
v>tgmax[itgt]) tgmax[itgt] =
v;
510 for (
UInt_t ivis=0; ivis<nvis; ivis++) {
512 if (
v<vmin[ivis]) vmin[ivis] =
v;
513 if (
v>vmax[ivis]) vmax[ivis] =
v;
517 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
520 if(
TMath::Abs(max[ivar]-min[ivar]) <= FLT_MIN )
523 for (
UInt_t ivar=0; ivar<ntgts; ivar++) {
526 if(
TMath::Abs(tgmax[ivar]-tgmin[ivar]) <= FLT_MIN )
529 for (
UInt_t ivar=0; ivar<nvis; ivar++) {
551 TMatrixD* mat = CalcCovarianceMatrix( ds, classNumber );
556 for (ivar=0; ivar<nvar; ivar++) {
557 for (jvar=0; jvar<nvar; jvar++) {
559 Double_t d = (*mat)(ivar, ivar)*(*mat)(jvar, jvar);
560 if (
d > 0) (*mat)(ivar, jvar) /= sqrt(
d);
562 Log() << kWARNING <<
Form(
"Dataset[%s] : ",
DataSetInfo().GetName())<<
"<GetCorrelationMatrix> Zero variances for variables "
563 <<
"(" << ivar <<
", " << jvar <<
") = " <<
d
565 (*mat)(ivar, jvar) = 0;
571 for (ivar=0; ivar<nvar; ivar++) (*mat)(ivar, ivar) = 1.0;
582 UInt_t ivar = 0, jvar = 0;
589 for (ivar=0; ivar<nvar; ivar++) {
591 for (jvar=0; jvar<nvar; jvar++) mat2(ivar, jvar) = 0;
599 if (ev->
GetClass() != classNumber )
continue;
604 for (ivar=0; ivar<nvar; ivar++) {
607 vec(ivar) += xi*weight;
608 mat2(ivar, ivar) += (xi*xi*weight);
610 for (jvar=ivar+1; jvar<nvar; jvar++) {
612 mat2(ivar, jvar) += (xi*xj*weight);
617 for (ivar=0; ivar<nvar; ivar++)
618 for (jvar=ivar+1; jvar<nvar; jvar++)
619 mat2(jvar, ivar) = mat2(ivar, jvar);
623 for (ivar=0; ivar<nvar; ivar++) {
624 for (jvar=0; jvar<nvar; jvar++) {
625 (*mat)(ivar, jvar) = mat2(ivar, jvar)/ic -
vec(ivar)*
vec(jvar)/(ic*ic);
647 splitSpecs.
SetConfigDescription(
"Configuration options given in the \"PrepareForTrainingAndTesting\" call; these options define the creation of the data sets used for training and expert validation by TMVA" );
649 splitMode =
"Random";
651 "Method of picking training and testing events (default: random)" );
656 mixMode =
"SameAsSplitMode";
658 "Method of mixing events of different classes into one dataset (default: SameAsSplitMode)" );
666 "Seed for random event shuffling" );
668 normMode =
"EqualNumEvents";
670 "Overall renormalisation of event-by-event weights used in the training (NumEvents: average weight of 1 per event, independently for signal and background; EqualNumEvents: average weight of 1 per event for signal, and sum of weights for background equal to sum of weights for signal)" );
675 splitSpecs.
DeclareOptionRef(fScaleWithPreselEff=
kFALSE,
"ScaleWithPreselEff",
"Scale the number of requested events by the eff. of the preselection cuts (or not)" );
686 splitSpecs.
DeclareOptionRef( nEventRequests.at(cl).nTrainingEventsRequested,
TString(
"nTrain_")+clName, titleTrain );
687 splitSpecs.
DeclareOptionRef( nEventRequests.at(cl).nTestingEventsRequested ,
TString(
"nTest_")+clName , titleTest );
688 splitSpecs.
DeclareOptionRef( nEventRequests.at(cl).TrainTestSplitRequested ,
TString(
"TrainTestSplit_")+clName , titleTest );
693 splitSpecs.
DeclareOptionRef( fVerboseLevel=
TString(
"Info"),
"VerboseLevel",
"VerboseLevel (Debug/Verbose/Info)" );
698 fCorrelations =
kTRUE;
699 splitSpecs.
DeclareOptionRef(fCorrelations,
"Correlations",
"Boolean to show correlation output (Default: true)");
700 fComputeCorrelations =
kTRUE;
701 splitSpecs.
DeclareOptionRef(fComputeCorrelations,
"CalcCorrelations",
"Compute correlations and also some variable statistics, e.g. min/max (Default: true )");
707 if (Verbose()) fLogger->SetMinType( kVERBOSE );
708 if (fVerboseLevel.CompareTo(
"Debug") ==0) fLogger->SetMinType( kDEBUG );
709 if (fVerboseLevel.CompareTo(
"Verbose") ==0) fLogger->SetMinType( kVERBOSE );
710 if (fVerboseLevel.CompareTo(
"Info") ==0) fLogger->SetMinType( kINFO );
716 <<
"\tSplitmode is: \"" << splitMode <<
"\" the mixmode is: \"" << mixMode <<
"\"" <<
Endl;
717 if (mixMode==
"SAMEASSPLITMODE") mixMode = splitMode;
718 else if (mixMode!=splitMode)
719 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"DataSet splitmode="<<splitMode
720 <<
" differs from mixmode="<<mixMode<<
Endl;
744 for (
size_t i=0; i<nclasses; i++) {
745 eventCounts[i].varAvLength =
new Float_t[nvars];
746 for (
UInt_t ivar=0; ivar<nvars; ivar++)
747 eventCounts[i].varAvLength[ivar] = 0;
757 std::map<TString, int> nanInfWarnings;
758 std::map<TString, int> nanInfErrors;
762 for (
UInt_t cl=0; cl<nclasses; cl++) {
766 EventStats& classEventCounts = eventCounts[cl];
780 std::vector<Float_t> vars(nvars);
781 std::vector<Float_t> tgts(ntgts);
782 std::vector<Float_t> vis(nvis);
792 ChangeToNewTree( currentInfo, dsi );
802 for (
Long64_t evtIdx = 0; evtIdx < nEvts; evtIdx++) {
809 ChangeToNewTree( currentInfo, dsi );
813 Int_t sizeOfArrays = 1;
814 Int_t prevArrExpr = 0;
826 for (
UInt_t ivar = 0; ivar < nvars; ivar++) {
829 auto inputFormula = fInputTableFormulas[ivar].first;
831 Int_t ndata = inputFormula->GetNdata();
834 if (ndata == 1)
continue;
835 haveAllArrayData =
kTRUE;
838 if (sizeOfArrays == 1) {
839 sizeOfArrays = ndata;
842 else if (sizeOfArrays!=ndata) {
843 Log() << kERROR <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"ERROR while preparing training and testing trees:" <<
Endl;
844 Log() <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
" multiple array-type expressions of different length were encountered" <<
Endl;
845 Log() <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
" location of error: event " << evtIdx
848 Log() <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
" expression " << inputFormula->GetTitle() <<
" has "
849 <<
Form(
"Dataset[%s] : ",dsi.
GetName()) << ndata <<
" entries, while" <<
Endl;
850 Log() <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
" expression " << fInputTableFormulas[prevArrExpr].first->GetTitle() <<
" has "
851 <<
Form(
"Dataset[%s] : ",dsi.
GetName())<< fInputTableFormulas[prevArrExpr].first->GetNdata() <<
" entries" <<
Endl;
852 Log() << kFATAL <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"Need to abort" <<
Endl;
857 for (
Int_t idata = 0; idata<sizeOfArrays; idata++) {
860 auto checkNanInf = [&](std::map<TString, int> &msgMap,
Float_t value,
const char *
what,
const char *formulaTitle) {
862 contains_NaN_or_inf =
kTRUE;
863 ++msgMap[
TString::Format(
"Dataset[%s] : %s expression resolves to indeterminate value (NaN): %s", dsi.
GetName(),
what, formulaTitle)];
865 contains_NaN_or_inf =
kTRUE;
866 ++msgMap[
TString::Format(
"Dataset[%s] : %s expression resolves to infinite value (+inf or -inf): %s", dsi.
GetName(),
what, formulaTitle)];
874 formula = fCutFormulas[cl];
880 checkNanInf(nanInfErrors, cutVal,
"Cut", formula->
GetTitle());
884 auto &nanMessages = cutVal < 0.5 ? nanInfWarnings : nanInfErrors;
887 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
888 auto formulaMap = fInputTableFormulas[ivar];
889 formula = formulaMap.first;
890 int inputVarIndex = formulaMap.second;
897 if (ndata < arraySize) {
899 <<
" in the current tree " << currentInfo.
GetTree()->
GetName() <<
" for the event " << evtIdx
900 <<
" is " << ndata <<
" instead of " << arraySize <<
Endl;
901 }
else if (ndata > arraySize && !foundLargerArraySize) {
903 <<
" in the current tree " << currentInfo.
GetTree()->
GetName() <<
" for the event "
904 << evtIdx <<
" is " << ndata <<
", larger than " << arraySize <<
Endl;
905 Log() << kWARNING <<
"Some data will then be ignored. This WARNING is printed only once, "
906 <<
" check in case for the other variables and events " <<
Endl;
908 foundLargerArraySize =
kTRUE;
913 vars[ivar] = ( !haveAllArrayData ?
916 checkNanInf(nanMessages, vars[ivar],
"Input", formula->
GetTitle());
920 for (
UInt_t itrgt=0; itrgt<ntgts; itrgt++) {
921 formula = fTargetFormulas[itrgt];
923 tgts[itrgt] = (ndata == 1 ?
926 checkNanInf(nanMessages, tgts[itrgt],
"Target", formula->
GetTitle());
930 for (
UInt_t itVis=0; itVis<nvis; itVis++) {
931 formula = fSpectatorFormulas[itVis];
933 vis[itVis] = (ndata == 1 ?
936 checkNanInf(nanMessages, vis[itVis],
"Spectator", formula->
GetTitle());
942 formula = fWeightFormula[cl];
945 weight *= (ndata == 1 ?
948 checkNanInf(nanMessages, weight,
"Weight", formula->
GetTitle());
958 if (cutVal<0.5)
continue;
967 if (contains_NaN_or_inf) {
968 Log() << kWARNING <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"NaN or +-inf in Event " << evtIdx <<
Endl;
969 if (sizeOfArrays>1) Log() << kWARNING <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
" rejected" <<
Endl;
979 event_v.push_back(
new Event(vars, tgts , vis, cl , weight));
986 if (!nanInfWarnings.empty()) {
987 Log() << kWARNING <<
"Found events with NaN and/or +-inf values" <<
Endl;
988 for (
const auto &warning : nanInfWarnings) {
989 auto &log = Log() << kWARNING << warning.first;
990 if (warning.second > 1) log <<
" (" << warning.second <<
" times)";
993 Log() << kWARNING <<
"These NaN and/or +-infs were all removed by the specified cut, continuing." <<
Endl;
997 if (!nanInfErrors.empty()) {
998 Log() << kWARNING <<
"Found events with NaN and/or +-inf values (not removed by cut)" <<
Endl;
999 for (
const auto &error : nanInfErrors) {
1000 auto &log = Log() << kWARNING << error.first;
1001 if (error.second > 1) log <<
" (" << error.second <<
" times)";
1004 Log() << kFATAL <<
"How am I supposed to train a NaN or +-inf?!" <<
Endl;
1010 Log() << kHEADER <<
Form(
"[%s] : ",dsi.
GetName()) <<
"Number of events in input trees" <<
Endl;
1011 Log() << kDEBUG <<
"(after possible flattening of arrays):" <<
Endl;
1018 <<
" -- number of events : "
1019 << std::setw(5) << eventCounts[cl].nEvBeforeCut
1020 <<
" / sum of weights: " << std::setw(5) << eventCounts[cl].nWeEvBeforeCut <<
Endl;
1026 <<
" tree -- total number of entries: "
1030 if (fScaleWithPreselEff)
1032 <<
"\tPreselection: (will affect number of requested training and testing events)" <<
Endl;
1035 <<
"\tPreselection: (will NOT affect number of requested training and testing events)" <<
Endl;
1041 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" "
1043 <<
" -- number of events passed: "
1044 << std::setw(5) << eventCounts[cl].nEvAfterCut
1045 <<
" / sum of weights: " << std::setw(5) << eventCounts[cl].nWeEvAfterCut <<
Endl;
1046 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" "
1048 <<
" -- efficiency : "
1049 << std::setw(6) << eventCounts[cl].nWeEvAfterCut/eventCounts[cl].nWeEvBeforeCut <<
Endl;
1052 else Log() << kDEBUG
1053 <<
" No preselection cuts applied on event classes" <<
Endl;
1076 if (splitMode.
Contains(
"RANDOM" ) ) {
1080 if( ! unspecifiedEvents.empty() ) {
1081 Log() << kDEBUG <<
"randomly shuffling "
1082 << unspecifiedEvents.size()
1083 <<
" events of class " << cls
1084 <<
" which are not yet associated to testing or training" <<
Endl;
1085 std::shuffle(unspecifiedEvents.begin(), unspecifiedEvents.end(), rndm);
1091 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"SPLITTING ========" <<
Endl;
1093 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"---- class " << cls <<
Endl;
1094 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"check number of training/testing events, requested and available number of events and for class " << cls <<
Endl;
1101 Int_t availableTraining = eventVectorTraining.size();
1102 Int_t availableTesting = eventVectorTesting.size();
1103 Int_t availableUndefined = eventVectorUndefined.size();
1106 if (fScaleWithPreselEff) {
1107 presel_scale = eventCounts[cls].cutScaling();
1108 if (presel_scale < 1)
1109 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" you have opted for scaling the number of requested training/testing events\n to be scaled by the preselection efficiency"<<
Endl;
1112 if (eventCounts[cls].cutScaling() < 1)
1113 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" you have opted for interpreting the requested number of training/testing events\n to be the number of events AFTER your preselection cuts" <<
Endl;
1120 if(eventCounts[cls].TrainTestSplitRequested < 1.0 && eventCounts[cls].TrainTestSplitRequested > 0.0){
1121 eventCounts[cls].nTrainingEventsRequested =
Int_t(eventCounts[cls].TrainTestSplitRequested*(availableTraining+availableTesting+availableUndefined));
1122 eventCounts[cls].nTestingEventsRequested =
Int_t(0);
1124 else if(eventCounts[cls].TrainTestSplitRequested != 0.0) Log() << kFATAL <<
Form(
"The option TrainTestSplit_<class> has to be in range (0, 1] but is set to %f.",eventCounts[cls].TrainTestSplitRequested) <<
Endl;
1125 Int_t requestedTraining =
Int_t(eventCounts[cls].nTrainingEventsRequested * presel_scale);
1126 Int_t requestedTesting =
Int_t(eventCounts[cls].nTestingEventsRequested * presel_scale);
1128 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"events in training trees : " << availableTraining <<
Endl;
1129 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"events in testing trees : " << availableTesting <<
Endl;
1130 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"events in unspecified trees : " << availableUndefined <<
Endl;
1131 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"requested for training : " << requestedTraining <<
Endl;
1134 Log() <<
" ( " << eventCounts[cls].nTrainingEventsRequested
1135 <<
" * " << presel_scale <<
" preselection efficiency)" <<
Endl;
1138 Log() << kDEBUG <<
"requested for testing : " << requestedTesting;
1140 Log() <<
" ( " << eventCounts[cls].nTestingEventsRequested
1141 <<
" * " << presel_scale <<
" preselection efficiency)" <<
Endl;
1192 Int_t useForTesting(0),useForTraining(0);
1193 Int_t allAvailable(availableUndefined + availableTraining + availableTesting);
1195 if( (requestedTraining == 0) && (requestedTesting == 0)){
1199 if ( availableUndefined >=
TMath::Abs(availableTraining - availableTesting) ) {
1201 useForTraining = useForTesting = allAvailable/2;
1204 useForTraining = availableTraining;
1205 useForTesting = availableTesting;
1206 if (availableTraining < availableTesting)
1207 useForTraining += availableUndefined;
1209 useForTesting += availableUndefined;
1211 requestedTraining = useForTraining;
1212 requestedTesting = useForTesting;
1215 else if (requestedTesting == 0){
1217 useForTraining =
TMath::Max(requestedTraining,availableTraining);
1218 if (allAvailable < useForTraining) {
1219 Log() << kFATAL <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"More events requested for training ("
1220 << requestedTraining <<
") than available ("
1221 << allAvailable <<
")!" <<
Endl;
1223 useForTesting = allAvailable - useForTraining;
1224 requestedTesting = useForTesting;
1227 else if (requestedTraining == 0){
1228 useForTesting =
TMath::Max(requestedTesting,availableTesting);
1229 if (allAvailable < useForTesting) {
1230 Log() << kFATAL <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"More events requested for testing ("
1231 << requestedTesting <<
") than available ("
1232 << allAvailable <<
")!" <<
Endl;
1234 useForTraining= allAvailable - useForTesting;
1235 requestedTraining = useForTraining;
1244 Int_t stillNeedForTraining =
TMath::Max(requestedTraining-availableTraining,0);
1245 Int_t stillNeedForTesting =
TMath::Max(requestedTesting-availableTesting,0);
1247 int NFree = availableUndefined - stillNeedForTraining - stillNeedForTesting;
1248 if (NFree <0) NFree = 0;
1249 useForTraining =
TMath::Max(requestedTraining,availableTraining) + NFree/2;
1250 useForTesting= allAvailable - useForTraining;
1253 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"determined event sample size to select training sample from="<<useForTraining<<
Endl;
1254 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"determined event sample size to select test sample from="<<useForTesting<<
Endl;
1259 if( splitMode ==
"ALTERNATE" ){
1260 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"split 'ALTERNATE'" <<
Endl;
1261 Int_t nTraining = availableTraining;
1262 for( EventVector::iterator it = eventVectorUndefined.begin(), itEnd = eventVectorUndefined.end(); it != itEnd; ){
1264 if( nTraining <= requestedTraining ){
1265 eventVectorTraining.insert( eventVectorTraining.end(), (*it) );
1269 eventVectorTesting.insert( eventVectorTesting.end(), (*it) );
1274 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"split '" << splitMode <<
"'" <<
Endl;
1277 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"availableundefined : " << availableUndefined <<
Endl;
1278 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"useForTraining : " << useForTraining <<
Endl;
1279 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"useForTesting : " << useForTesting <<
Endl;
1280 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"availableTraining : " << availableTraining <<
Endl;
1281 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"availableTesting : " << availableTesting <<
Endl;
1283 if( availableUndefined<(useForTraining-availableTraining) ||
1284 availableUndefined<(useForTesting -availableTesting ) ||
1285 availableUndefined<(useForTraining+useForTesting-availableTraining-availableTesting ) ){
1286 Log() << kFATAL <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"More events requested than available!" <<
Endl;
1290 if (useForTraining>availableTraining){
1291 eventVectorTraining.insert( eventVectorTraining.end() , eventVectorUndefined.begin(), eventVectorUndefined.begin()+ useForTraining- availableTraining );
1292 eventVectorUndefined.erase( eventVectorUndefined.begin(), eventVectorUndefined.begin() + useForTraining- availableTraining);
1294 if (useForTesting>availableTesting){
1295 eventVectorTesting.insert( eventVectorTesting.end() , eventVectorUndefined.begin(), eventVectorUndefined.begin()+ useForTesting- availableTesting );
1298 eventVectorUndefined.clear();
1301 if (splitMode.
Contains(
"RANDOM" )){
1302 UInt_t sizeTraining = eventVectorTraining.size();
1303 if( sizeTraining >
UInt_t(requestedTraining) ){
1304 std::vector<UInt_t> indicesTraining( sizeTraining );
1308 std::shuffle(indicesTraining.begin(), indicesTraining.end(), rndm);
1310 indicesTraining.erase( indicesTraining.begin()+sizeTraining-
UInt_t(requestedTraining), indicesTraining.end() );
1312 for( std::vector<UInt_t>::iterator it = indicesTraining.begin(), itEnd = indicesTraining.end(); it != itEnd; ++it ){
1313 delete eventVectorTraining.at( (*it) );
1314 eventVectorTraining.at( (*it) ) = NULL;
1317 eventVectorTraining.erase( std::remove( eventVectorTraining.begin(), eventVectorTraining.end(), (
void*)NULL ), eventVectorTraining.end() );
1320 UInt_t sizeTesting = eventVectorTesting.size();
1321 if( sizeTesting >
UInt_t(requestedTesting) ){
1322 std::vector<UInt_t> indicesTesting( sizeTesting );
1326 std::shuffle(indicesTesting.begin(), indicesTesting.end(), rndm);
1328 indicesTesting.erase( indicesTesting.begin()+sizeTesting-
UInt_t(requestedTesting), indicesTesting.end() );
1330 for( std::vector<UInt_t>::iterator it = indicesTesting.begin(), itEnd = indicesTesting.end(); it != itEnd; ++it ){
1331 delete eventVectorTesting.at( (*it) );
1332 eventVectorTesting.at( (*it) ) = NULL;
1335 eventVectorTesting.erase( std::remove( eventVectorTesting.begin(), eventVectorTesting.end(), (
void*)NULL ), eventVectorTesting.end() );
1339 if( eventVectorTraining.size() <
UInt_t(requestedTraining) )
1340 Log() << kWARNING <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"DataSetFactory/requested number of training samples larger than size of eventVectorTraining.\n"
1341 <<
"There is probably an issue. Please contact the TMVA developers." <<
Endl;
1342 else if (eventVectorTraining.size() >
UInt_t(requestedTraining)) {
1343 std::for_each( eventVectorTraining.begin()+requestedTraining, eventVectorTraining.end(), DeleteFunctor<Event>() );
1344 eventVectorTraining.erase(eventVectorTraining.begin()+requestedTraining,eventVectorTraining.end());
1346 if( eventVectorTesting.size() <
UInt_t(requestedTesting) )
1347 Log() << kWARNING <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"DataSetFactory/requested number of testing samples larger than size of eventVectorTesting.\n"
1348 <<
"There is probably an issue. Please contact the TMVA developers." <<
Endl;
1349 else if ( eventVectorTesting.size() >
UInt_t(requestedTesting) ) {
1350 std::for_each( eventVectorTesting.begin()+requestedTesting, eventVectorTesting.end(), DeleteFunctor<Event>() );
1351 eventVectorTesting.erase(eventVectorTesting.begin()+requestedTesting,eventVectorTesting.end());
1358 Int_t trainingSize = 0;
1359 Int_t testingSize = 0;
1373 trainingEventVector->reserve( trainingSize );
1374 testingEventVector->reserve( testingSize );
1380 Log() << kDEBUG <<
" MIXING ============= " <<
Endl;
1382 if( mixMode ==
"ALTERNATE" ){
1387 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"Training sample: You are trying to mix events in alternate mode although the classes have different event numbers. This works but the alternation stops at the last event of the smaller class."<<
Endl;
1390 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"Testing sample: You are trying to mix events in alternate mode although the classes have different event numbers. This works but the alternation stops at the last event of the smaller class."<<
Endl;
1393 typedef EventVector::iterator EvtVecIt;
1394 EvtVecIt itEvent, itEventEnd;
1397 Log() << kDEBUG <<
"insert class 0 into training and test vector" <<
Endl;
1399 testingEventVector->insert( testingEventVector->end(), tmpEventVector[
Types::kTesting].at(0).begin(), tmpEventVector[
Types::kTesting].at(0).end() );
1404 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"insert class " << cls <<
Endl;
1406 itTarget = trainingEventVector->begin() - 1;
1408 for( itEvent = tmpEventVector[
Types::kTraining].at(cls).begin(), itEventEnd = tmpEventVector[
Types::kTraining].at(cls).end(); itEvent != itEventEnd; ++itEvent ){
1410 if( (trainingEventVector->end() - itTarget) <
Int_t(cls+1) ) {
1411 itTarget = trainingEventVector->end();
1412 trainingEventVector->insert( itTarget, itEvent, itEventEnd );
1416 trainingEventVector->insert( itTarget, (*itEvent) );
1420 itTarget = testingEventVector->begin() - 1;
1422 for( itEvent = tmpEventVector[
Types::kTesting].at(cls).begin(), itEventEnd = tmpEventVector[
Types::kTesting].at(cls).end(); itEvent != itEventEnd; ++itEvent ){
1424 if( ( testingEventVector->end() - itTarget ) <
Int_t(cls+1) ) {
1425 itTarget = testingEventVector->end();
1426 testingEventVector->insert( itTarget, itEvent, itEventEnd );
1430 testingEventVector->insert( itTarget, (*itEvent) );
1436 trainingEventVector->insert( trainingEventVector->end(), tmpEventVector[
Types::kTraining].at(cls).begin(), tmpEventVector[
Types::kTraining].at(cls).end() );
1437 testingEventVector->insert ( testingEventVector->end(), tmpEventVector[
Types::kTesting].at(cls).begin(), tmpEventVector[
Types::kTesting].at(cls).end() );
1446 if (mixMode ==
"RANDOM") {
1447 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"shuffling events"<<
Endl;
1449 std::shuffle(trainingEventVector->begin(), trainingEventVector->end(), rndm);
1450 std::shuffle(testingEventVector->begin(), testingEventVector->end(), rndm);
1453 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"trainingEventVector " << trainingEventVector->size() <<
Endl;
1454 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"testingEventVector " << testingEventVector->size() <<
Endl;
1466 Log() << kFATAL <<
"Dataset " << std::string(dsi.
GetName()) <<
" does not have any training events, I better stop here and let you fix that one first " <<
Endl;
1470 Log() << kERROR <<
"Dataset " << std::string(dsi.
GetName()) <<
" does not have any testing events, guess that will cause problems later..but for now, I continue " <<
Endl;
1473 delete trainingEventVector;
1474 delete testingEventVector;
1504 Double_t trainingSumSignalWeights = 0;
1505 Double_t trainingSumBackgrWeights = 0;
1506 Double_t testingSumSignalWeights = 0;
1507 Double_t testingSumBackgrWeights = 0;
1512 trainingSizePerClass.at(cls) = tmpEventVector[
Types::kTraining].at(cls).size();
1513 testingSizePerClass.at(cls) = tmpEventVector[
Types::kTesting].at(cls).size();
1526 trainingSumWeightsPerClass.at(cls) =
1531 testingSumWeightsPerClass.at(cls) =
1537 trainingSumSignalWeights += trainingSumWeightsPerClass.at(cls);
1538 testingSumSignalWeights += testingSumWeightsPerClass.at(cls);
1540 trainingSumBackgrWeights += trainingSumWeightsPerClass.at(cls);
1541 testingSumBackgrWeights += testingSumWeightsPerClass.at(cls);
1561 if (normMode ==
"NONE") {
1562 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"No weight renormalisation applied: use original global and event weights" <<
Endl;
1568 else if (normMode ==
"NUMEVENTS") {
1570 <<
"\tWeight renormalisation mode: \"NumEvents\": renormalises all event classes " <<
Endl;
1572 <<
" such that the effective (weighted) number of events in each class equals the respective " <<
Endl;
1574 <<
" number of events (entries) that you demanded in PrepareTrainingAndTestTree(\"\",\"nTrain_Signal=.. )" <<
Endl;
1576 <<
" ... i.e. such that Sum[i=1..N_j]{w_i} = N_j, j=0,1,2..." <<
Endl;
1578 <<
" ... (note that N_j is the sum of TRAINING events (nTrain_j...with j=Signal,Background.." <<
Endl;
1580 <<
" ..... Testing events are not renormalised nor included in the renormalisation factor! )"<<
Endl;
1586 renormFactor.at(cls) = ((
Float_t)trainingSizePerClass.at(cls) )/
1587 (trainingSumWeightsPerClass.at(cls)) ;
1590 else if (normMode ==
"EQUALNUMEVENTS") {
1596 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"Weight renormalisation mode: \"EqualNumEvents\": renormalises all event classes ..." <<
Endl;
1597 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" such that the effective (weighted) number of events in each class is the same " <<
Endl;
1598 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" (and equals the number of events (entries) given for class=0 )" <<
Endl;
1599 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"... i.e. such that Sum[i=1..N_j]{w_i} = N_classA, j=classA, classB, ..." <<
Endl;
1600 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
"... (note that N_j is the sum of TRAINING events" <<
Endl;
1601 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) <<
" ..... Testing events are not renormalised nor included in the renormalisation factor!)" <<
Endl;
1604 UInt_t referenceClass = 0;
1606 renormFactor.at(cls) =
Float_t(trainingSizePerClass.at(referenceClass))/
1607 (trainingSumWeightsPerClass.at(cls));
1611 Log() << kFATAL <<
Form(
"Dataset[%s] : ",dsi.
GetName())<<
"<PrepareForTrainingAndTesting> Unknown NormMode: " << normMode <<
Endl;
1619 <<
"--> Rescale " << setiosflags(ios::left) << std::setw(maxL)
1621 for (EventVector::iterator it = tmpEventVector[
Types::kTraining].at(cls).begin(),
1622 itEnd = tmpEventVector[
Types::kTraining].at(cls).end(); it != itEnd; ++it){
1623 (*it)->SetWeight ((*it)->GetWeight() * renormFactor.at(cls));
1634 <<
"Number of training and testing events" <<
Endl;
1635 Log() << kDEBUG <<
"\tafter rescaling:" <<
Endl;
1637 <<
"---------------------------------------------------------------------------" <<
Endl;
1639 trainingSumSignalWeights = 0;
1640 trainingSumBackgrWeights = 0;
1641 testingSumSignalWeights = 0;
1642 testingSumBackgrWeights = 0;
1645 trainingSumWeightsPerClass.at(cls) =
1650 testingSumWeightsPerClass.at(cls) =
1656 trainingSumSignalWeights += trainingSumWeightsPerClass.at(cls);
1657 testingSumSignalWeights += testingSumWeightsPerClass.at(cls);
1659 trainingSumBackgrWeights += trainingSumWeightsPerClass.at(cls);
1660 testingSumBackgrWeights += testingSumWeightsPerClass.at(cls);
1666 << setiosflags(ios::left) << std::setw(maxL)
1668 <<
"training events : " << trainingSizePerClass.at(cls) <<
Endl;
1669 Log() << kDEBUG <<
"\t(sum of weights: " << trainingSumWeightsPerClass.at(cls) <<
")"
1670 <<
" - requested were " << eventCounts[cls].nTrainingEventsRequested <<
" events" <<
Endl;
1672 << setiosflags(ios::left) << std::setw(maxL)
1674 <<
"testing events : " << testingSizePerClass.at(cls) <<
Endl;
1675 Log() << kDEBUG <<
"\t(sum of weights: " << testingSumWeightsPerClass.at(cls) <<
")"
1676 <<
" - requested were " << eventCounts[cls].nTestingEventsRequested <<
" events" <<
Endl;
1678 << setiosflags(ios::left) << std::setw(maxL)
1680 <<
"training and testing events: "
1681 << (trainingSizePerClass.at(cls)+testingSizePerClass.at(cls)) <<
Endl;
1682 Log() << kDEBUG <<
"\t(sum of weights: "
1683 << (trainingSumWeightsPerClass.at(cls)+testingSumWeightsPerClass.at(cls)) <<
")" <<
Endl;
1684 if(eventCounts[cls].nEvAfterCut<eventCounts[cls].nEvBeforeCut) {
1685 Log() << kINFO <<
Form(
"Dataset[%s] : ",dsi.
GetName()) << setiosflags(ios::left) << std::setw(maxL)
1687 <<
"due to the preselection a scaling factor has been applied to the numbers of requested events: "
1688 << eventCounts[cls].cutScaling() <<
Endl;
1691 Log() << kINFO <<
Endl;
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
TMatrixT< Double_t > TMatrixD
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
A specialized string object used for TTree selections.
virtual TFile * GetFile() const
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
virtual bool IsOnTerminalBranch() const
TBranch * GetBranch() const
const TCut & GetCut() const
void SetNumber(const UInt_t index)
const TString & GetWeight() const
void SetConfigDescription(const char *d)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
void SetConfigName(const char *n)
virtual void ParseOptions()
options parser
void CheckForUnusedOptions() const
checks for unused options in option string
~DataSetFactory()
destructor
DataSet * BuildInitialDataSet(DataSetInfo &, TMVA::DataInputHandler &)
if no entries, than create a DataSet with one Event which uses dynamic variables (pointers to variabl...
DataSetFactory()
constructor
std::map< Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType
void ChangeToNewTree(TreeInfo &, const DataSetInfo &)
While the data gets copied into the local training and testing trees, the input tree can change (for ...
void BuildEventVector(DataSetInfo &dsi, DataInputHandler &dataInput, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts)
build empty event vectors distributes events between kTraining/kTesting/kMaxTreeType
DataSet * CreateDataSet(DataSetInfo &, DataInputHandler &)
steering the creation of a new dataset
DataSet * MixEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts, const TString &splitMode, const TString &mixMode, const TString &normMode, UInt_t splitSeed)
Select and distribute unassigned events to kTraining and kTesting.
std::vector< int > NumberPerClass
std::vector< EventVector > EventVectorOfClasses
void InitOptions(DataSetInfo &dsi, EvtStatsPerClass &eventsmap, TString &normMode, UInt_t &splitSeed, TString &splitMode, TString &mixMode)
the dataset splitting
void CalcMinMax(DataSet *, DataSetInfo &dsi)
compute covariance matrix
std::vector< Double_t > ValuePerClass
DataSet * BuildDynamicDataSet(DataSetInfo &)
std::vector< EventStats > EvtStatsPerClass
Bool_t CheckTTreeFormula(TTreeFormula *ttf, const TString &expression, Bool_t &hasDollar)
checks a TTreeFormula for problems
void RenormEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, const EvtStatsPerClass &eventCounts, const TString &normMode)
renormalisation of the TRAINING event weights
TMatrixD * CalcCorrelationMatrix(DataSet *, const UInt_t classNumber)
computes correlation matrix for variables "theVars" in tree; "theType" defines the required event "ty...
TMatrixD * CalcCovarianceMatrix(DataSet *, const UInt_t classNumber)
compute covariance matrix
std::vector< Event * > EventVector
Class that contains all the data information.
std::vector< VariableInfo > & GetVariableInfos()
UInt_t GetNVariables() const
UInt_t GetNSpectators(bool all=kTRUE) const
Int_t GetVarArraySize(const TString &expression) const
ClassInfo * AddClass(const TString &className)
virtual const char * GetName() const
Returns name of object.
Bool_t IsVariableFromArray(Int_t i) const
std::vector< VariableInfo > & GetSpectatorInfos()
void SetNormalization(const TString &norm)
UInt_t GetNClasses() const
const TString & GetSplitOptions() const
UInt_t GetNTargets() const
void SetTestingSumSignalWeights(Double_t testingSumSignalWeights)
UInt_t GetSignalClassIndex()
void SetTrainingSumSignalWeights(Double_t trainingSumSignalWeights)
ClassInfo * GetClassInfo(Int_t clNum) const
void SetTestingSumBackgrWeights(Double_t testingSumBackgrWeights)
Int_t GetClassNameMaxLength() const
void PrintCorrelationMatrix(const TString &className)
calculates the correlation matrices for signal and background, prints them to standard output,...
VariableInfo & GetVariableInfo(Int_t i)
void SetTrainingSumBackgrWeights(Double_t trainingSumBackgrWeights)
VariableInfo & GetTargetInfo(Int_t i)
VariableInfo & GetSpectatorInfo(Int_t i)
void SetCorrelationMatrix(const TString &className, TMatrixD *matrix)
Class that contains all the data information.
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
void SetEventCollection(std::vector< Event * > *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
Long64_t GetNTestEvents() const
const Event * GetEvent() const
returns event without transformations
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Long64_t GetNTrainingEvents() const
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
void SetCurrentType(Types::ETreeType type) const
void SetCurrentEvent(Long64_t ievt) const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetSpectator(UInt_t ivar) const
return spectator content
void SetSpectatorTypes(const std::vector< char > &types)
Float_t GetTarget(UInt_t itgt) const
ostringstream derivative to redirect and format output
Types::ETreeType GetTreeType() const
const TString & GetClassName() const
Double_t GetWeight() const
@ kMaxTreeType
also used as temporary storage for trees not yet assigned for testing;training...
const TString & GetExpression() const
const TString & GetInternalName() const
const char * GetName() const override
Returns name of object.
const char * GetTitle() const override
Returns title of object.
virtual const char * ClassName() const
Returns name of class to which the object belongs.
const char * Data() const
void ToUpper()
Change string to upper case.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
A TTree represents a columnar dataset.
virtual void SetBranchStatus(const char *bname, bool status=true, UInt_t *found=nullptr)
Set branch status to Process or DoNotProcess.
virtual Int_t GetEntry(Long64_t entry, Int_t getall=0)
Read all branches of entry and return total number of bytes read.
TFile * GetCurrentFile() const
Return pointer to the current file.
TDirectory * GetDirectory() const
virtual Long64_t GetEntries() const
virtual TTree * GetTree() const
virtual Long64_t LoadTree(Long64_t entry)
Set current entry.
virtual void ResetBranchAddresses()
Tell all of our branches to drop their current objects and allocate new ones.
create variable transformations
Int_t LargestCommonDivider(Int_t a, Int_t b)
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Int_t Finite(Double_t x)
Check if it is finite with a mask in order to be consistent in presence of fast math.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.