14RSampler::RSampler(std::vector<RFlat2DMatrix> &datasets,
const std::string &sampleType,
float sampleRatio,
15 bool replacement,
bool shuffle, std::size_t setSeed)
68 auto minRatio = std::to_string(std::round(
double(
fNumMinor) /
double(
fNumMajor) * 100.0) / 100.0);
69 minRatio.erase(minRatio.find(
'.') + 3);
70 throw std::invalid_argument(
71 "The sampling_ratio is too low: not enough entries in the majority class to sample from.\n"
72 "Choose sampling_ratio > " +
73 minRatio +
" or set replacement to True.");
109 std::size_t cols =
fDatasets[0].GetCols();
113 std::size_t index = 0;
117 UndersampledMajorTensor.
GetData() + index * cols);
132 std::size_t cols =
fDatasets[0].GetCols();
136 std::size_t index = 0;
140 OversampledMinorTensor.
GetData() + index * cols);
154 std::uniform_int_distribution<> dist(0, max - 1);
157 for (std::size_t i = 0; i < n_samples; ++i) {
160 std::random_device rd;
185 std::vector<std::size_t> UniqueSamples;
186 UniqueSamples.reserve(max);
190 for (std::size_t i = 0; i < max; ++i)
191 UniqueSamples.push_back(i);
194 std::random_device rd;
202 std::shuffle(UniqueSamples.begin(), UniqueSamples.end(),
g);
205 for (std::size_t i = 0; i < n_samples; ++i) {
206 fSamples.push_back(UniqueSamples[i]);
void SampleWithoutReplacement(std::size_t n_samples, std::size_t max)
Add indices without replacement to fSamples.
void SetupRandomUndersampler()
Calculate fNumEntries and major/minor variables for the random undersampler.
void RandomOversampler(RFlat2DMatrix &ShuffledTensor)
Oversample entries randomly from the minority dataset.
void SampleWithReplacement(std::size_t n_samples, std::size_t max)
Add indices with replacement to fSamples.
void SetupRandomOversampler()
Calculate fNumEntries and major/minor variables for the random oversampler.
void SetupSampler()
Calculate fNumEntries and major/minor variables.
std::vector< std::size_t > fSamples
std::unique_ptr< RFlat2DMatrixOperators > fTensorOperators
RSampler(std::vector< RFlat2DMatrix > &datasets, const std::string &sampleType, float sampleRatio, bool replacement=false, bool shuffle=true, std::size_t setSeed=0)
void RandomUndersampler(RFlat2DMatrix &ShuffledTensor)
Undersample entries randomly from the majority dataset.
std::size_t fNumResampledMajor
std::vector< RFlat2DMatrix > & fDatasets
void Sampler(RFlat2DMatrix &SampledTensor)
Collection of sampling types.
std::size_t fNumResampledMinor
Wrapper around ROOT::RVec<float> representing a 2D matrix.