113 const std::size_t batchesInMemory,
const std::vector<std::string> &cols,
114 const std::vector<std::size_t> &vecSizes = {},
const float vecPadding = 0.0,
115 const float testSize = 0.0,
bool shuffle =
true,
bool dropRemainder =
true,
116 const std::size_t setSeed = 0,
bool loadEager =
false, std::string sampleType =
"",
117 float sampleRatio = 1.0,
bool replacement =
false)
301 auto validationEmpty = [&] {
311 const std::size_t numTrainingClusters =
fClusterLoader->GetNumTrainingClusters();
327 if (validationEmpty()) {
343 std::vector<RClusterRange> trainClustersToLoad;
344 auto accumulatedEntries = 0;
347 (!discovering || trainClustersToLoad.empty())) {
349 trainClustersToLoad.push_back(cluster);
350 accumulatedEntries += cluster.GetNumEntries();
360 std::size_t rowOffset = 0;
362 for (
auto &cluster : trainClustersToLoad) {
363 auto loadedEntries =
fClusterLoader->LoadTrainingClusterInto(stagingBuffer, cluster.rdfIdx,
364 cluster.start, cluster.end, rowOffset);
368 cluster.SetNumEntries(loadedEntries);
370 rowOffset += cluster.GetNumEntries();
380 if (rowOffset <
static_cast<std::size_t
>(accumulatedEntries)) {
391 if (isLastBuffer && discovering) {
399 const std::size_t numValidationClusters =
fClusterLoader->GetNumValidationClusters();
422 std::vector<RClusterRange> valClustersToLoad;
423 auto accumulatedEntries = 0;
426 valClustersToLoad.push_back(cluster);
427 accumulatedEntries += cluster.GetNumEntries();
435 std::size_t rowOffset = 0;
437 for (
const auto &cluster : valClustersToLoad) {
438 fClusterLoader->LoadValidationClusterInto(stagingBuffer, cluster.rdfIdx, cluster.start, cluster.end,
440 rowOffset += cluster.GetNumEntries();