Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RChunkConstructor.hxx
Go to the documentation of this file.
1// Author: Martin Føll, University of Oslo (UiO) & CERN 05/2025
2
3/*************************************************************************
4 * Copyright (C) 1995-2025, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_INTERNAL_ML_RCHUNKCONSTRUCTOR
12#define ROOT_INTERNAL_ML_RCHUNKCONSTRUCTOR
13
14#include <utility>
15#include <vector>
16
17#include "Rtypes.h"
18
20/**
21\class ROOT::Experimental::Internal::ML::RChunkConstructor
22
23\brief The logic for constructing chunks from a dataset.
24
25This struct handles the logic for splitting a dataset into smaller subsets
26known as chunks, which are constructed from blocks.
27
28A chunk is the largest portion of the dataset loaded into memory at once,
29and each chunk is further divided into batches for machine learning training.
30
31The dataset is split into disjoint chunks based on a user-defined chunk size.
32There are two types of chunks:
33 - Full chunks: contain exactly the number of entries specified by the chunk size.
34 - Leftover chunk: contains any remaining entries that don't make up a full chunk.
35
36Each chunk is constructed from blocks based on a user-defined block size.
37There are two types of blocks:
38 - Full blocks: contain exactly the number of entries specified by the block size.
39 - Leftover block: contains any remaining entries that don't make up a full block.
40
41The blocks are defined by their start and end entries, which correspond to positions within the dataset’s total number
42of entries.
43*/
44
46 std::size_t fNumEntries{};
47 std::size_t fChunkSize{};
48 std::size_t fBlockSize{};
49
50 // size of full and leftover chunks
51 std::size_t SizeOfFullChunk;
53
54 // size of full and leftover blocks in a full and leftover chunk
59
60 // number of full, leftover and total chunks
61 std::size_t FullChunks;
62 std::size_t LeftoverChunks;
63 std::size_t Chunks;
64
65 // number of full, leftover and total blocks in a full chunk
68 std::size_t BlockPerFullChunk;
69
70 // number of full, leftover and total blocks in the leftover chunk
74
75 // total number of full and leftover blocks in the full chunks
78
79 // total number of full and leftover blocks in the leftover chunks
82
83 // vector of the different block sizes
84 std::vector<std::size_t> SizeOfBlocks;
85
86 // vector with the number of the different block
87 std::vector<std::size_t> NumberOfDifferentBlocks;
88
89 // total number of blocks
90 std::size_t NumberOfBlocks;
91
92 // pair of start and end entries in the different block types
93 std::vector<std::pair<Long_t, Long_t>> BlockIntervals;
94
95 std::vector<std::pair<Long_t, Long_t>> FullBlockIntervalsInFullChunks;
96 std::vector<std::pair<Long_t, Long_t>> LeftoverBlockIntervalsInFullChunks;
97
98 std::vector<std::pair<Long_t, Long_t>> FullBlockIntervalsInLeftoverChunks;
99 std::vector<std::pair<Long_t, Long_t>> LeftoverBlockIntervalsInLeftoverChunks;
100
101 std::vector<std::vector<std::pair<Long_t, Long_t>>> ChunksIntervals;
102
103 std::vector<std::size_t> ChunksSizes;
104
105 RChunkConstructor(const std::size_t numEntries, const std::size_t chunkSize, const std::size_t blockSize);
106
109 void SizeOfChunks();
110};
111} // namespace ROOT::Experimental::Internal::ML
112
113#endif // ROOT_INTERNAL_ML_RCHUNKCONSTRUCTOR
The logic for constructing chunks from a dataset.
void CreateChunksIntervals()
Creates chunks from the dataset consisting of blocks with the begin and end entry.
std::vector< std::pair< Long_t, Long_t > > FullBlockIntervalsInFullChunks
std::vector< std::pair< Long_t, Long_t > > FullBlockIntervalsInLeftoverChunks
std::vector< std::vector< std::pair< Long_t, Long_t > > > ChunksIntervals
void SizeOfChunks()
Fills a vector with the size of every chunk from the dataset.
std::vector< std::pair< Long_t, Long_t > > LeftoverBlockIntervalsInFullChunks
std::vector< std::pair< Long_t, Long_t > > LeftoverBlockIntervalsInLeftoverChunks
std::vector< std::pair< Long_t, Long_t > > BlockIntervals
void DistributeBlockIntervals()
Group the blocks based on the block type (full or leftover) based on the size of the block.
RChunkConstructor(const std::size_t numEntries, const std::size_t chunkSize, const std::size_t blockSize)