73using std::setiosflags, std::ios;
96 fScaleWithPreselEff(0),
109 std::vector<TTreeFormula*>::const_iterator
formIt;
129 if (
ds->GetNEvents() > 1 && fComputeCorrelations ) {
133 for (
UInt_t cl = 0; cl<
dsi.GetNClasses(); cl++) {
134 const TString className =
dsi.GetClassInfo(cl)->GetName();
135 dsi.SetCorrelationMatrix( className, CalcCorrelationMatrix(
ds, cl ) );
137 dsi.PrintCorrelationMatrix(className);
141 Log() << kHEADER <<
Form(
"[%s] : ",
dsi.GetName()) <<
" " <<
Endl <<
Endl;
151 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"Build DataSet consisting of one Event with dynamically changing variables" <<
Endl;
156 if(
dsi.GetNClasses()==0){
157 dsi.AddClass(
"data" );
158 dsi.GetClassInfo(
"data" )->SetNumber(0);
161 std::vector<Float_t*>*
evdyn =
new std::vector<Float_t*>(0);
163 std::vector<VariableInfo>&
varinfos =
dsi.GetVariableInfos();
166 Log() << kFATAL <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"Dynamic data set cannot be built, since no variable informations are present. Apparently no variables have been set. This should not happen, please contact the TMVA authors." <<
Endl;
169 for (;it!=
itEnd;++it) {
172 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"The link to the external variable is NULL while I am trying to build a dynamic data set. In this case fTmpEvent from MethodBase HAS TO BE USED in the method to get useful values in variables." <<
Endl;
191 ds->SetCurrentEvent( 0 );
205 if (
dataInput.GetEntries()==0)
return BuildDynamicDataSet(
dsi );
212 dsi.AddClass( (*it) );
233 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"Collected:" <<
Endl;
234 for (
UInt_t cl = 0; cl <
dsi.GetNClasses(); cl++) {
235 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
" "
236 << setiosflags(ios::left) << std::setw(
maxL) <<
dsi.GetClassInfo(cl)->GetName()
237 <<
" training entries: " <<
ds->GetNClassEvents( 0, cl ) <<
Endl;
238 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
" "
239 << setiosflags(ios::left) << std::setw(
maxL) <<
dsi.GetClassInfo(cl)->GetName()
240 <<
" testing entries: " <<
ds->GetNClassEvents( 1, cl ) <<
Endl;
242 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
" " <<
Endl;
257 if(
ttf->GetNdim() <= 0 )
258 Log() << kFATAL <<
"Expression " << expression.
Data()
259 <<
" could not be resolved to a valid formula. " <<
Endl;
260 if(
ttf->GetNdata() == 0 ){
261 Log() << kWARNING <<
"Expression: " << expression.
Data()
262 <<
" does not provide data for this event. "
263 <<
"This event is not taken into account. --> please check if you use as a variable "
264 <<
"an entry of an array which is not filled for some events "
265 <<
"(e.g. arr[4] when arr has only 3 elements)." <<
Endl;
266 Log() << kWARNING <<
"If you want to take the event into account you can do something like: "
267 <<
"\"Alt$(arr[4],0)\" where in cases where arr doesn't have a 4th element, "
268 <<
" 0 is taken as an alternative." <<
Endl;
275 for (
int i = 0,
iEnd =
ttf->GetNcodes (); i <
iEnd; ++i)
278 if (!
leaf->IsOnTerminalBranch())
298 tr->ResetBranchAddresses();
303 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
" create input formulas for tree " <<
tr->GetName() <<
Endl;
306 fInputFormulas.clear();
308 fInputTableFormulas.clear();
313 for (
UInt_t i = 0; i <
dsi.GetNVariables(); i++) {
316 if (!
dsi.IsVariableFromArray(i) ) {
318 dsi.GetVariableInfo(i).GetExpression().Data(),
tr);
319 CheckTTreeFormula(
ttf,
dsi.GetVariableInfo(i).GetExpression(),
hasDollar);
320 fInputFormulas.emplace_back(
ttf);
321 fInputTableFormulas.emplace_back(std::make_pair(
ttf, (
Int_t) 0));
328 dsi.GetVariableInfo(i).GetExpression().Data(),
tr);
329 CheckTTreeFormula(
ttf,
dsi.GetVariableInfo(i).GetExpression(),
hasDollar);
330 fInputFormulas.push_back(
ttf);
332 arraySize =
dsi.GetVarArraySize(
dsi.GetVariableInfo(i).GetExpression());
336 Log() << kINFO <<
"Using variable " <<
dsi.GetVariableInfo(i).GetInternalName() <<
337 " from array expression " <<
dsi.GetVariableInfo(i).GetExpression() <<
" of size " << arraySize <<
Endl;
344 Log() << kDEBUG <<
"Using Last variable from array : " <<
dsi.GetVariableInfo(i).GetInternalName() <<
Endl;
353 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"transform regression targets" <<
Endl;
355 fTargetFormulas.clear();
356 for (
UInt_t i=0; i<
dsi.GetNTargets(); i++) {
358 dsi.GetTargetInfo(i).GetExpression().Data(),
tr );
359 CheckTTreeFormula(
ttf,
dsi.GetTargetInfo(i).GetExpression(),
hasDollar );
360 fTargetFormulas.push_back(
ttf );
366 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"transform spectator variables" <<
Endl;
368 fSpectatorFormulas.clear();
369 for (
UInt_t i=0; i<
dsi.GetNSpectators(); i++) {
371 dsi.GetSpectatorInfo(i).GetExpression().Data(),
tr );
372 CheckTTreeFormula(
ttf,
dsi.GetSpectatorInfo(i).GetExpression(),
hasDollar );
373 fSpectatorFormulas.push_back(
ttf );
379 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"transform cuts" <<
Endl;
381 fCutFormulas.clear();
390 Log() << kWARNING <<
"Please check class \"" <<
dsi.GetClassInfo(
clIdx)->GetName()
391 <<
"\" cut \"" <<
dsi.GetClassInfo(
clIdx)->GetCut() <<
Endl;
394 fCutFormulas.push_back(
ttf );
400 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"transform weights" <<
Endl;
402 fWeightFormula.clear();
406 if (
dsi.GetClassInfo(
clIdx)->GetName() !=
tinfo.GetClassName() ) {
407 fWeightFormula.push_back( 0 );
416 Log() << kWARNING <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"Please check class \"" <<
dsi.GetClassInfo(
clIdx)->GetName()
417 <<
"\" weight \"" <<
dsi.GetClassInfo(
clIdx)->GetWeight() <<
Endl;
423 fWeightFormula.push_back(
ttf );
428 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"enable branches" <<
Endl;
432 tr->SetBranchStatus(
"*",0);
433 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"enable branches: input variables" <<
Endl;
438 tr->SetBranchStatus(
ttf->GetLeaf(
bi)->GetBranch()->GetName(), 1 );
442 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"enable branches: targets" <<
Endl;
446 tr->SetBranchStatus(
ttf->GetLeaf(
bi)->GetBranch()->GetName(), 1 );
449 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"enable branches: spectators" <<
Endl;
453 tr->SetBranchStatus(
ttf->GetLeaf(
bi)->GetBranch()->GetName(), 1 );
456 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"enable branches: cuts" <<
Endl;
461 tr->SetBranchStatus(
ttf->GetLeaf(
bi)->GetBranch()->GetName(), 1 );
464 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"enable branches: weights" <<
Endl;
469 tr->SetBranchStatus(
ttf->GetLeaf(
bi)->GetBranch()->GetName(), 1 );
472 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"tree initialized" <<
Endl;
481 const UInt_t nvar =
ds->GetNVariables();
498 for (
Int_t i=0; i<
ds->GetNEvents(); i++) {
521 Log() << kWARNING <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"Variable " <<
dsi.GetVariableInfo(
ivar).GetExpression().Data() <<
" is constant. Please remove the variable." <<
Endl;
527 Log() << kFATAL <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"Target " <<
dsi.GetTargetInfo(
ivar).GetExpression().Data() <<
" is constant. Please remove the variable." <<
Endl;
562 Log() << kWARNING <<
Form(
"Dataset[%s] : ",
DataSetInfo().GetName())<<
"<GetCorrelationMatrix> Zero variances for variables "
563 <<
"(" <<
ivar <<
", " <<
jvar <<
") = " <<
d
596 for (
Int_t i=0; i<
ds->GetNEvents(); i++) {
647 splitSpecs.SetConfigDescription(
"Configuration options given in the \"PrepareForTrainingAndTesting\" call; these options define the creation of the data sets used for training and expert validation by TMVA" );
651 "Method of picking training and testing events (default: random)" );
658 "Method of mixing events of different classes into one dataset (default: SameAsSplitMode)" );
666 "Seed for random event shuffling" );
670 "Overall renormalisation of event-by-event weights used in the training (NumEvents: average weight of 1 per event, independently for signal and background; EqualNumEvents: average weight of 1 per event for signal, and sum of weights for background equal to sum of weights for signal)" );
675 splitSpecs.DeclareOptionRef(fScaleWithPreselEff=
kFALSE,
"ScaleWithPreselEff",
"Scale the number of requested events by the eff. of the preselection cuts (or not)" );
680 for (
UInt_t cl = 0; cl <
dsi.GetNClasses(); cl++) {
691 splitSpecs.DeclareOptionRef( fVerbose,
"V",
"Verbosity (default: true)" );
693 splitSpecs.DeclareOptionRef( fVerboseLevel=
TString(
"Info"),
"VerboseLevel",
"VerboseLevel (Debug/Verbose/Info)" );
698 fCorrelations =
kTRUE;
699 splitSpecs.DeclareOptionRef(fCorrelations,
"Correlations",
"Boolean to show correlation output (Default: true)");
700 fComputeCorrelations =
kTRUE;
701 splitSpecs.DeclareOptionRef(fComputeCorrelations,
"CalcCorrelations",
"Compute correlations and also some variable statistics, e.g. min/max (Default: true )");
707 if (Verbose()) fLogger->SetMinType( kVERBOSE );
708 if (fVerboseLevel.CompareTo(
"Debug") ==0) fLogger->SetMinType( kDEBUG );
709 if (fVerboseLevel.CompareTo(
"Verbose") ==0) fLogger->SetMinType( kVERBOSE );
710 if (fVerboseLevel.CompareTo(
"Info") ==0) fLogger->SetMinType( kINFO );
719 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"DataSet splitmode="<<
splitMode
740 const UInt_t nvars =
dsi.GetNVariables();
770 <<
"\tWeight expression for class \'" <<
dsi.GetClassInfo(cl)->GetName() <<
"\': \""
771 <<
dsi.GetClassInfo(cl)->GetWeight() <<
"\"" <<
Endl;
780 std::vector<Float_t> vars(nvars);
782 std::vector<Float_t> vis(
nvis);
785 Log() << kINFO <<
"Building event vectors for type " <<
currentInfo.GetTreeType() <<
" " <<
currentInfo.GetClassName() <<
Endl;
828 if (
dsi.IsVariableFromArray(
ivar))
continue;
834 if (
ndata == 1)
continue;
843 Log() << kERROR <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"ERROR while preparing training and testing trees:" <<
Endl;
844 Log() <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
" multiple array-type expressions of different length were encountered" <<
Endl;
845 Log() <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
" location of error: event " <<
evtIdx
847 <<
" of file " <<
currentInfo.GetTree()->GetCurrentFile()->GetName() <<
Endl;
848 Log() <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
" expression " <<
inputFormula->GetTitle() <<
" has "
849 <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
ndata <<
" entries, while" <<
Endl;
850 Log() <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
" expression " << fInputTableFormulas[
prevArrExpr].first->GetTitle() <<
" has "
851 <<
Form(
"Dataset[%s] : ",
dsi.GetName())<< fInputTableFormulas[
prevArrExpr].first->GetNdata() <<
" entries" <<
Endl;
852 Log() << kFATAL <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"Need to abort" <<
Endl;
874 formula = fCutFormulas[cl];
896 Int_t arraySize =
dsi.GetVarArraySize(
dsi.GetVariableInfo(
ivar).GetExpression());
897 if (
ndata < arraySize) {
898 Log() << kFATAL <<
"Size of array " <<
dsi.GetVariableInfo(
ivar).GetExpression()
899 <<
" in the current tree " <<
currentInfo.GetTree()->GetName() <<
" for the event " <<
evtIdx
900 <<
" is " <<
ndata <<
" instead of " << arraySize <<
Endl;
902 Log() << kWARNING <<
"Size of array " <<
dsi.GetVariableInfo(
ivar).GetExpression()
903 <<
" in the current tree " <<
currentInfo.GetTree()->GetName() <<
" for the event "
905 Log() << kWARNING <<
"Some data will then be ignored. This WARNING is printed only once, "
906 <<
" check in case for the other variables and events " <<
Endl;
921 formula = fTargetFormulas[
itrgt];
931 formula = fSpectatorFormulas[
itVis];
942 formula = fWeightFormula[cl];
945 weight *= (
ndata == 1 ?
968 Log() << kWARNING <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"NaN or +-inf in Event " <<
evtIdx <<
Endl;
987 Log() << kWARNING <<
"Found events with NaN and/or +-inf values" <<
Endl;
989 auto &log = Log() << kWARNING << warning.first;
990 if (warning.second > 1) log <<
" (" << warning.second <<
" times)";
993 Log() << kWARNING <<
"These NaN and/or +-infs were all removed by the specified cut, continuing." <<
Endl;
998 Log() << kWARNING <<
"Found events with NaN and/or +-inf values (not removed by cut)" <<
Endl;
1000 auto &log = Log() << kWARNING << error.first;
1001 if (error.second > 1) log <<
" (" << error.second <<
" times)";
1004 Log() << kFATAL <<
"How am I supposed to train a NaN or +-inf?!" <<
Endl;
1010 Log() << kHEADER <<
Form(
"[%s] : ",
dsi.GetName()) <<
"Number of events in input trees" <<
Endl;
1011 Log() << kDEBUG <<
"(after possible flattening of arrays):" <<
Endl;
1014 for (
UInt_t cl = 0; cl <
dsi.GetNClasses(); cl++) {
1017 << setiosflags(ios::left) << std::setw(
maxL) <<
dsi.GetClassInfo(cl)->GetName()
1018 <<
" -- number of events : "
1020 <<
" / sum of weights: " << std::setw(5) <<
eventCounts[cl].nWeEvBeforeCut <<
Endl;
1023 for (
UInt_t cl = 0; cl <
dsi.GetNClasses(); cl++) {
1025 <<
" " << std::setw(
maxL) <<
dsi.GetClassInfo(cl)->GetName()
1026 <<
" tree -- total number of entries: "
1027 << std::setw(5) <<
dataInput.GetEntries(
dsi.GetClassInfo(cl)->GetName()) <<
Endl;
1030 if (fScaleWithPreselEff)
1032 <<
"\tPreselection: (will affect number of requested training and testing events)" <<
Endl;
1035 <<
"\tPreselection: (will NOT affect number of requested training and testing events)" <<
Endl;
1037 if (
dsi.HasCuts()) {
1038 for (
UInt_t cl = 0; cl<
dsi.GetNClasses(); cl++) {
1039 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
" " << setiosflags(ios::left) << std::setw(
maxL) <<
dsi.GetClassInfo(cl)->GetName()
1040 <<
" requirement: \"" <<
dsi.GetClassInfo(cl)->GetCut() <<
"\"" <<
Endl;
1041 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
" "
1042 << setiosflags(ios::left) << std::setw(
maxL) <<
dsi.GetClassInfo(cl)->GetName()
1043 <<
" -- number of events passed: "
1045 <<
" / sum of weights: " << std::setw(5) <<
eventCounts[cl].nWeEvAfterCut <<
Endl;
1046 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
" "
1047 << setiosflags(ios::left) << std::setw(
maxL) <<
dsi.GetClassInfo(cl)->GetName()
1048 <<
" -- efficiency : "
1052 else Log() << kDEBUG
1053 <<
" No preselection cuts applied on event classes" <<
Endl;
1081 Log() << kDEBUG <<
"randomly shuffling "
1083 <<
" events of class " <<
cls
1084 <<
" which are not yet associated to testing or training" <<
Endl;
1091 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"SPLITTING ========" <<
Endl;
1093 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"---- class " <<
cls <<
Endl;
1094 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"check number of training/testing events, requested and available number of events and for class " <<
cls <<
Endl;
1106 if (fScaleWithPreselEff) {
1109 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
" you have opted for scaling the number of requested training/testing events\n to be scaled by the preselection efficiency"<<
Endl;
1113 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
" you have opted for interpreting the requested number of training/testing events\n to be the number of events AFTER your preselection cuts" <<
Endl;
1124 else if(
eventCounts[
cls].TrainTestSplitRequested != 0.0) Log() << kFATAL <<
Form(
"The option TrainTestSplit_<class> has to be in range (0, 1] but is set to %f.",
eventCounts[
cls].TrainTestSplitRequested) <<
Endl;
1219 Log() << kFATAL <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"More events requested for training ("
1230 Log() << kFATAL <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"More events requested for testing ("
1248 if (NFree <0) NFree = 0;
1253 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"determined event sample size to select training sample from="<<
useForTraining<<
Endl;
1254 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"determined event sample size to select test sample from="<<
useForTesting<<
Endl;
1260 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"split 'ALTERNATE'" <<
Endl;
1274 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"split '" <<
splitMode <<
"'" <<
Endl;
1286 Log() << kFATAL <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"More events requested than available!" <<
Endl;
1340 Log() << kWARNING <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"DataSetFactory/requested number of training samples larger than size of eventVectorTraining.\n"
1341 <<
"There is probably an issue. Please contact the TMVA developers." <<
Endl;
1347 Log() << kWARNING <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"DataSetFactory/requested number of testing samples larger than size of eventVectorTesting.\n"
1348 <<
"There is probably an issue. Please contact the TMVA developers." <<
Endl;
1380 Log() << kDEBUG <<
" MIXING ============= " <<
Endl;
1387 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"Training sample: You are trying to mix events in alternate mode although the classes have different event numbers. This works but the alternation stops at the last event of the smaller class."<<
Endl;
1390 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"Testing sample: You are trying to mix events in alternate mode although the classes have different event numbers. This works but the alternation stops at the last event of the smaller class."<<
Endl;
1393 typedef EventVector::iterator
EvtVecIt;
1397 Log() << kDEBUG <<
"insert class 0 into training and test vector" <<
Endl;
1404 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"insert class " <<
cls <<
Endl;
1447 Log() << kDEBUG <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"shuffling events"<<
Endl;
1465 if (
ds->GetNTrainingEvents() < 1){
1466 Log() << kFATAL <<
"Dataset " << std::string(
dsi.GetName()) <<
" does not have any training events, I better stop here and let you fix that one first " <<
Endl;
1469 if (
ds->GetNTestEvents() < 1) {
1470 Log() << kERROR <<
"Dataset " << std::string(
dsi.GetName()) <<
" does not have any testing events, guess that will cause problems later..but for now, I continue " <<
Endl;
1536 if (
cls ==
dsi.GetSignalClassIndex()){
1562 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"No weight renormalisation applied: use original global and event weights" <<
Endl;
1568 else if (
normMode ==
"NUMEVENTS") {
1570 <<
"\tWeight renormalisation mode: \"NumEvents\": renormalises all event classes " <<
Endl;
1572 <<
" such that the effective (weighted) number of events in each class equals the respective " <<
Endl;
1574 <<
" number of events (entries) that you demanded in PrepareTrainingAndTestTree(\"\",\"nTrain_Signal=.. )" <<
Endl;
1576 <<
" ... i.e. such that Sum[i=1..N_j]{w_i} = N_j, j=0,1,2..." <<
Endl;
1578 <<
" ... (note that N_j is the sum of TRAINING events (nTrain_j...with j=Signal,Background.." <<
Endl;
1580 <<
" ..... Testing events are not renormalised nor included in the renormalisation factor! )"<<
Endl;
1590 else if (
normMode ==
"EQUALNUMEVENTS") {
1596 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"Weight renormalisation mode: \"EqualNumEvents\": renormalises all event classes ..." <<
Endl;
1597 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
" such that the effective (weighted) number of events in each class is the same " <<
Endl;
1598 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
" (and equals the number of events (entries) given for class=0 )" <<
Endl;
1599 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"... i.e. such that Sum[i=1..N_j]{w_i} = N_classA, j=classA, classB, ..." <<
Endl;
1600 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
"... (note that N_j is the sum of TRAINING events" <<
Endl;
1601 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) <<
" ..... Testing events are not renormalised nor included in the renormalisation factor!)" <<
Endl;
1611 Log() << kFATAL <<
Form(
"Dataset[%s] : ",
dsi.GetName())<<
"<PrepareForTrainingAndTesting> Unknown NormMode: " <<
normMode <<
Endl;
1619 <<
"--> Rescale " << setiosflags(ios::left) << std::setw(
maxL)
1634 <<
"Number of training and testing events" <<
Endl;
1635 Log() << kDEBUG <<
"\tafter rescaling:" <<
Endl;
1637 <<
"---------------------------------------------------------------------------" <<
Endl;
1655 if (
cls ==
dsi.GetSignalClassIndex()){
1666 << setiosflags(ios::left) << std::setw(
maxL)
1667 <<
dsi.GetClassInfo(
cls)->GetName() <<
" -- "
1670 <<
" - requested were " <<
eventCounts[
cls].nTrainingEventsRequested <<
" events" <<
Endl;
1672 << setiosflags(ios::left) << std::setw(
maxL)
1673 <<
dsi.GetClassInfo(
cls)->GetName() <<
" -- "
1676 <<
" - requested were " <<
eventCounts[
cls].nTestingEventsRequested <<
" events" <<
Endl;
1678 << setiosflags(ios::left) << std::setw(
maxL)
1679 <<
dsi.GetClassInfo(
cls)->GetName() <<
" -- "
1680 <<
"training and testing events: "
1682 Log() << kDEBUG <<
"\t(sum of weights: "
1685 Log() << kINFO <<
Form(
"Dataset[%s] : ",
dsi.GetName()) << setiosflags(ios::left) << std::setw(
maxL)
1686 <<
dsi.GetClassInfo(
cls)->GetName() <<
" -- "
1687 <<
"due to the preselection a scaling factor has been applied to the numbers of requested events: "
1691 Log() << kINFO <<
Endl;
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
TMatrixT< Double_t > TMatrixD
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
const_iterator begin() const
const_iterator end() const
A specialized string object used for TTree selections.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
~DataSetFactory()
destructor
DataSet * BuildInitialDataSet(DataSetInfo &, TMVA::DataInputHandler &)
if no entries, than create a DataSet with one Event which uses dynamic variables (pointers to variabl...
DataSetFactory()
constructor
std::map< Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType
void ChangeToNewTree(TreeInfo &, const DataSetInfo &)
While the data gets copied into the local training and testing trees, the input tree can change (for ...
void BuildEventVector(DataSetInfo &dsi, DataInputHandler &dataInput, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts)
build empty event vectors distributes events between kTraining/kTesting/kMaxTreeType
DataSet * CreateDataSet(DataSetInfo &, DataInputHandler &)
steering the creation of a new dataset
DataSet * MixEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts, const TString &splitMode, const TString &mixMode, const TString &normMode, UInt_t splitSeed)
Select and distribute unassigned events to kTraining and kTesting.
std::vector< int > NumberPerClass
std::vector< EventVector > EventVectorOfClasses
void InitOptions(DataSetInfo &dsi, EvtStatsPerClass &eventsmap, TString &normMode, UInt_t &splitSeed, TString &splitMode, TString &mixMode)
the dataset splitting
void CalcMinMax(DataSet *, DataSetInfo &dsi)
compute covariance matrix
std::vector< Double_t > ValuePerClass
DataSet * BuildDynamicDataSet(DataSetInfo &)
std::vector< EventStats > EvtStatsPerClass
Bool_t CheckTTreeFormula(TTreeFormula *ttf, const TString &expression, Bool_t &hasDollar)
checks a TTreeFormula for problems
void RenormEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, const EvtStatsPerClass &eventCounts, const TString &normMode)
renormalisation of the TRAINING event weights
TMatrixD * CalcCorrelationMatrix(DataSet *, const UInt_t classNumber)
computes correlation matrix for variables "theVars" in tree; "theType" defines the required event "ty...
TMatrixD * CalcCovarianceMatrix(DataSet *, const UInt_t classNumber)
compute covariance matrix
std::vector< Event * > EventVector
Class that contains all the data information.
Class that contains all the data information.
ostringstream derivative to redirect and format output
@ kMaxTreeType
also used as temporary storage for trees not yet assigned for testing;training...
const char * GetTitle() const override
Returns title of object.
const char * Data() const
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
A TTree represents a columnar dataset.
create variable transformations
Int_t LargestCommonDivider(Int_t a, Int_t b)
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Int_t Finite(Double_t x)
Check if it is finite with a mask in order to be consistent in presence of fast math.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.