60 Log() << kFATAL <<
"DataSet prepared for \"" << fNumFolds <<
"\" folds, requested fold \"" <<
foldNumber
61 <<
"\" is outside of range." <<
Endl;
70 [&](
UInt_t sum, std::vector<TMVA::Event *>
v) { return sum + v.size(); });
93 Log() << kDEBUG <<
"Fold prepared, num events in training set: " <<
tempTrain.size() <<
Endl;
94 Log() << kDEBUG <<
"Fold prepared, num events in test set: " <<
tempTest.size() <<
Endl;
106 Log() << kFATAL <<
"PrepareFoldDataSet can only work with training and testing data sets." << std::endl;
117 Log() << kFATAL <<
"Only kTraining is supported for CvSplit::RecombineKFoldDataSet currently." << std::endl;
120 std::vector<Event *> *
tempVec =
new std::vector<Event *>;
122 for (
UInt_t i = 0; i < fNumFolds; ++i) {
123 tempVec->insert(
tempVec->
end(), fTrainEvents.at(i).begin(), fTrainEvents.at(i).end());
140 : fDsi(
dsi), fIdxFormulaParNumFolds(std::numeric_limits<
Int_t>::max()), fSplitFormula(
"",
expr),
141 fParValues(fSplitFormula.GetNpar())
144 throw std::runtime_error(
"Split expression \"" + std::string(
fSplitExpr.
Data()) +
"\" is not a valid TFormula.");
152 if (
name ==
"NumFolds" ||
name ==
"numFolds") {
166 for (
auto &
p : fFormulaParIdxToDsiSpecIdx) {
173 if (fIdxFormulaParNumFolds < fSplitFormula.GetNpar()) {
174 fParValues[fIdxFormulaParNumFolds] =
numFolds;
183 throw std::runtime_error(
"Output of splitExpr must be non-negative.");
188 throw std::runtime_error(
"Output of splitExpr should be a non-negative"
189 "integer between 0 and numFolds-1 inclusive.");
212 if (
vi.GetName() ==
name) {
214 }
else if (
vi.GetLabel() ==
name) {
216 }
else if (
vi.GetExpression() ==
name) {
221 throw std::runtime_error(
"Spectator \"" + std::string(
name.Data()) +
"\" not found.");
260 if (fSplitExprString !=
TString(
"")) {
265 if (fMakeFoldDataSet) {
266 Log() << kINFO <<
"Splitting in k-folds has been already done" <<
Endl;
270 fMakeFoldDataSet =
kTRUE;
279 fTrainEvents = SplitSets(
trainData, fNumFolds, numClasses);
280 fTestEvents = SplitSets(
testData, fNumFolds, numClasses);
319std::vector<std::vector<TMVA::Event *>>
325 std::vector<std::vector<Event *>>
tempSets;
352 fEventToFoldMapping[
ev] =
iFold;
356 std::vector<std::vector<TMVA::Event *>>
oldSets;
372 for(
UInt_t i = 0; i<numClasses; ++i){
378 for(
UInt_t i = 0; i<numClasses; ++i) {
386 fEventToFoldMapping[
ev] =
iFold;
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
winID h TVirtualViewer3D TVirtualGLPainter p
const_iterator begin() const
const_iterator end() const
Int_t fIdxFormulaParNumFolds
Maps parameter indicies in splitExpr to their spectator index in the datasetinfo.
UInt_t Eval(UInt_t numFolds, const Event *ev)
std::vector< std::pair< Int_t, Int_t > > fFormulaParIdxToDsiSpecIdx
UInt_t GetSpectatorIndexForName(DataSetInfo &dsi, TString name)
static Bool_t Validate(TString expr)
CvSplitKFoldsExpr(DataSetInfo &dsi, TString expr)
TFormula fSplitFormula
Expression used to split data into folds. Should output values between 0 and numFolds.
TString fSplitExpr
Keeps track of the index of reserved par "NumFolds" in splitExpr.
std::vector< UInt_t > GetEventIndexToFoldMapping(UInt_t nEntries, UInt_t numFolds, UInt_t seed=100)
Generates a vector of fold assignments.
void MakeKFoldDataSet(DataSetInfo &dsi) override
Prepares a DataSet for cross validation.
std::vector< std::vector< Event * > > SplitSets(std::vector< TMVA::Event * > &oldSet, UInt_t numFolds, UInt_t numClasses)
Split sets for into k-folds.
TString fSplitExprString
! Expression used to split data into folds. Should output values between 0 and numFolds.
CvSplitKFolds(UInt_t numFolds, TString splitExpr="", Bool_t stratified=kTRUE, UInt_t seed=100)
Splits a dataset into k folds, ready for use in cross validation.
virtual void RecombineKFoldDataSet(DataSetInfo &dsi, Types::ETreeType tt=Types::kTraining)
virtual void PrepareFoldDataSet(DataSetInfo &dsi, UInt_t foldNumber, Types::ETreeType tt)
Set training and test set vectors of dataset described by dsi.
Class that contains all the data information.
Class for type info of MVA input variable.
const char * Data() const
MsgLogger & Endl(MsgLogger &ml)
static uint64_t sum(uint64_t i)