58 using Scalar_t =
typename Architecture_t::Scalar_t;
59 using Matrix_t =
typename Architecture_t::Matrix_t;
77 size_t convergenceSteps,
90 template <
typename Data_t,
typename Net_t>
91 Scalar_t
Train(
const Data_t & TrainingDataIn,
size_t nTrainingSamples,
92 const Data_t & TestDataIn,
size_t nTestSamples,
93 Net_t & net,
size_t nThreads = 1);
96 template <
typename Data_t,
typename Net_t>
97 Scalar_t
TrainMomentum(
const Data_t & TrainingDataIn,
size_t nTrainingSamples,
98 const Data_t & TestDataIn,
size_t nTestSamples,
99 Net_t & net, Scalar_t momentum,
size_t nThreads = 1);
106 template <
typename Net_t>
107 void Step(Net_t &net, Matrix_t &input,
const Matrix_t &output,
const Matrix_t &weights);
111 template <
typename Net_t>
112 Scalar_t
StepLoss(Net_t &net, Matrix_t &input,
const Matrix_t &output,
const Matrix_t &weights);
120 template <
typename Net_t>
121 void Step(Net_t &master,
122 std::vector<Net_t> &nets,
126 template <
typename Net_t>
128 std::vector<Net_t> &nets,
131 template <
typename Net_t>
136 std::vector<Net_t> &nets,
143 template <
typename Net_t>
148 template <
typename Net_t>
149 Scalar_t
StepReducedWeightsLoss(Net_t &net, Matrix_t &input,
const Matrix_t &output,
const Matrix_t &weights);
197 size_t nTrainingSamples,
198 const Data_t & testData,
210 net.GetOutputWidth(), nThreads);
211 auto testNet = net.CreateClone(nTestSamples);
213 testNet.GetBatchSize(),
214 testNet.GetInputWidth(),
215 net.GetOutputWidth());
216 std::vector<Net_t> nets{};
217 nets.reserve(nThreads);
218 for (
size_t i = 0; i < nThreads; i++) {
220 for (
size_t j = 0; j < net.GetDepth(); j++)
222 auto &masterLayer = net.GetLayer(j);
223 auto &layer = nets.back().GetLayer(j);
224 Architecture_t::Copy(layer.GetWeights(),
225 masterLayer.GetWeights());
226 Architecture_t::Copy(layer.GetBiases(),
227 masterLayer.GetBiases());
231 size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
232 std::vector<TBatch<Architecture_t>> batches{};
233 batches.reserve(nThreads);
238 for (
size_t i = 0; i < batchesInEpoch; i += nThreads) {
240 for (
size_t j = 0; j < nThreads; j++) batches.push_back(trainLoader.
GetBatch());
241 Step(net, nets, batches);
245 auto b = *testLoader.
begin();
246 auto inputMatrix =
b.GetInput();
247 auto outputMatrix =
b.GetOutput();
248 auto weightMatrix =
b.GetWeights();
249 fTestError = testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
260 size_t nTrainingSamples,
261 const Data_t & testData,
274 net.GetOutputWidth(), nThreads);
275 auto testNet = net.CreateClone(net.GetBatchSize());
277 testNet.GetBatchSize(),
278 testNet.GetInputWidth(),
279 net.GetOutputWidth());
281 net.InitializeGradients();
282 std::vector<Net_t> nets{};
283 nets.reserve(nThreads);
284 for (
size_t i = 0; i < nThreads; i++) {
286 for (
size_t j = 0; j < net.GetDepth(); j++)
288 auto &masterLayer = net.GetLayer(j);
289 auto &layer = nets.back().GetLayer(j);
290 Architecture_t::Copy(layer.GetWeights(),
291 masterLayer.GetWeights());
292 Architecture_t::Copy(layer.GetBiases(),
293 masterLayer.GetBiases());
297 size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
298 std::vector<TBatch<Architecture_t>> batches{};
299 batches.reserve(nThreads);
304 for (
size_t i = 0; i < batchesInEpoch; i += nThreads) {
306 for (
size_t j = 0; j < nThreads; j++) batches.push_back(trainLoader.
GetBatch());
307 if (momentum != 0.0) {
310 Step(net, nets, batches);
316 for (
size_t i = 0; i < batchesInEpoch; i++) {
318 auto inputMatrix =
b.GetInput();
319 auto outputMatrix =
b.GetOutput();
320 auto weightMatrix =
b.GetWeights();
321 fTestError += testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
376 std::vector<Net_t> & nets,
379 typename Architecture_t::Matrix_t dummy(0,0);
380 size_t depth = master.GetDepth();
383 for (
size_t j = 0; j < nets.size(); j++) {
384 nets[j].GetLayer(0).Forward(batches[j].GetInput(),
true);
387 for (
size_t i = 1; i < depth; i++)
389 for (
size_t j = 0; j < nets.size(); j++) {
390 nets[j].GetLayer(i).Forward(nets[j].GetLayer(i-1).GetOutput(),
true);
394 for (
size_t j = 0; j < nets.size(); j++) {
396 batches[j].GetOutput(), nets[j].GetLayer(depth - 1).GetOutput(),
397 batches[j].GetWeights());
400 for (
size_t i = depth - 1; i > 0; i--)
402 for (
size_t j = 0; j < nets.size(); j++) {
403 nets[j].GetLayer(i).Backward(nets[j].GetLayer(i-1).GetActivationGradients(),
404 nets[j].GetLayer(i-1).GetOutput(),
405 nets[j].GetRegularization(),
406 nets[j].GetWeightDecay());
409 for (
size_t j = 0; j < nets.size(); j++) {
410 nets[j].GetLayer(0).Backward(dummy,
411 batches[j].GetInput(),
412 nets[j].GetRegularization(),
413 nets[j].GetWeightDecay());
416 for (
size_t j = 0; j < nets.size(); j++) {
417 for (
size_t i = 0; i < depth; i++)
419 auto &masterLayer = master.GetLayer(i);
420 auto &layer = nets[j].GetLayer(i);
421 Architecture_t::ScaleAdd(masterLayer.GetWeights(),
422 layer.GetWeightGradients(),
424 Architecture_t::Copy(layer.GetWeights(),
425 masterLayer.GetWeights());
426 Architecture_t::ScaleAdd(masterLayer.GetBiases(),
427 layer.GetBiasGradients(),
429 Architecture_t::Copy(layer.GetBiases(),
430 masterLayer.GetBiases());
440 std::vector<Net_t> & nets,
444 typename Architecture_t::Matrix_t dummy(0,0);
445 size_t depth = master.GetDepth();
448 for (
size_t j = 0; j < nets.size(); j++) {
449 nets[j].GetLayer(0).Forward(batches[j].GetInput(),
true);
452 for (
size_t i = 1; i < depth; i++)
454 for (
size_t j = 0; j < nets.size(); j++) {
455 nets[j].GetLayer(i).Forward(nets[j].GetLayer(i-1).GetOutput(),
true);
459 for (
size_t j = 0; j < nets.size(); j++) {
461 batches[j].GetOutput(), nets[j].GetLayer(depth - 1).GetOutput(),
462 batches[j].GetWeights());
465 for (
size_t i = depth - 1; i > 0; i--)
467 for (
size_t j = 0; j < nets.size(); j++) {
468 nets[j].GetLayer(i).Backward(nets[j].GetLayer(i-1).GetActivationGradients(),
469 nets[j].GetLayer(i-1).GetOutput(),
470 nets[j].GetRegularization(),
471 nets[j].GetWeightDecay());
472 Architecture_t::ScaleAdd(master.GetLayer(i).GetWeightGradients(),
473 nets[j].GetLayer(i).GetWeightGradients(),
475 Architecture_t::ScaleAdd(master.GetLayer(i).GetBiasGradients(),
476 nets[j].GetLayer(i).GetBiasGradients(),
479 Architecture_t::ScaleAdd(master.GetLayer(i).GetWeightGradients(),
480 master.GetLayer(i).GetWeightGradients(),
482 Architecture_t::ScaleAdd(master.GetLayer(i).GetBiasGradients(),
483 master.GetLayer(i).GetBiasGradients(),
486 for (
size_t j = 0; j < nets.size(); j++) {
487 nets[j].GetLayer(0).Backward(dummy,
488 batches[j].GetInput(),
489 nets[j].GetRegularization(),
490 nets[j].GetWeightDecay());
491 Architecture_t::ScaleAdd(master.GetLayer(0).GetWeightGradients(),
492 nets[j].GetLayer(0).GetWeightGradients(),
494 Architecture_t::ScaleAdd(master.GetLayer(0).GetBiasGradients(),
495 nets[j].GetLayer(0).GetBiasGradients(),
499 Architecture_t::ScaleAdd(master.GetLayer(0).GetWeightGradients(),
500 master.GetLayer(0).GetWeightGradients(),
502 Architecture_t::ScaleAdd(master.GetLayer(0).GetBiasGradients(),
503 master.GetLayer(0).GetBiasGradients(),
506 for (
size_t i = 0; i < depth; i++)
508 auto &masterLayer = master.GetLayer(i);
509 Architecture_t::ScaleAdd(masterLayer.GetWeights(),
510 masterLayer.GetWeightGradients(),
512 Architecture_t::ScaleAdd(masterLayer.GetBiases(),
513 masterLayer.GetBiasGradients(),
515 for (
size_t j = 0; j < nets.size(); j++) {
516 auto &layer = nets[j].GetLayer(i);
517 Architecture_t::Copy(layer.GetWeights(),
518 masterLayer.GetWeights());
519 Architecture_t::Copy(layer.GetBiases(),
520 masterLayer.GetBiases());
530 std::vector<Net_t> & nets,
534 typename Architecture_t::Matrix_t dummy(0,0);
535 size_t depth = master.GetDepth();
538 for (
size_t j = 0; j < nets.size(); j++) {
539 nets[j].GetLayer(0).Forward(batches[j].GetInput(),
true);
542 for (
size_t i = 1; i < depth; i++)
544 for (
size_t j = 0; j < nets.size(); j++) {
545 nets[j].GetLayer(i).Forward(nets[j].GetLayer(i-1).GetOutput(),
true);
550 for (
size_t j = 0; j < nets.size(); j++) {
552 batches[j].GetOutput(), nets[j].GetLayer(depth - 1).GetOutput(),
553 batches[j].GetWeights());
557 for (
size_t i = depth - 1; i > 0; i--)
559 for (
size_t j = 0; j < nets.size(); j++) {
560 nets[j].GetLayer(i).Backward(nets[j].GetLayer(i-1).GetActivationGradients(),
561 nets[j].GetLayer(i-1).GetOutput(),
562 nets[j].GetRegularization(),
563 nets[j].GetWeightDecay());
567 for (
size_t j = 0; j < nets.size(); j++) {
568 nets[j].GetLayer(0).Backward(dummy,
569 batches[j].GetInput(),
570 nets[j].GetRegularization(),
571 nets[j].GetWeightDecay());
574 for (
size_t i = 0; i < depth; i++)
576 auto &masterLayer = master.GetLayer(i);
577 for (
size_t j = 0; j < nets.size(); j++) {
578 auto &layer = nets[j].GetLayer(i);
579 Architecture_t::Copy(layer.GetWeights(),
580 masterLayer.GetWeights());
581 Architecture_t::Copy(layer.GetBiases(),
582 masterLayer.GetBiases());
583 Architecture_t::ScaleAdd(layer.GetWeights(),
584 masterLayer.GetWeightGradients(),
586 Architecture_t::ScaleAdd(layer.GetBiases(),
587 masterLayer.GetBiasGradients(),
590 for (
size_t j = 0; j < nets.size(); j++) {
591 auto &layer = nets[j].GetLayer(i);
592 Architecture_t::ScaleAdd(masterLayer.GetWeightGradients(),
593 layer.GetWeightGradients(),
595 Architecture_t::ScaleAdd(masterLayer.GetBiasGradients(),
596 layer.GetBiasGradients(),
599 Architecture_t::ScaleAdd(masterLayer.GetWeightGradients(),
600 masterLayer.GetWeightGradients(),
602 Architecture_t::ScaleAdd(masterLayer.GetBiasGradients(),
603 masterLayer.GetBiasGradients(),
605 Architecture_t::ScaleAdd(masterLayer.GetWeights(),
606 masterLayer.GetWeightGradients(),
608 Architecture_t::ScaleAdd(masterLayer.GetBiases(),
609 masterLayer.GetBiasGradients(),
Scalar_t Train(const Data_t &TrainingDataIn, size_t nTrainingSamples, const Data_t &TestDataIn, size_t nTestSamples, Net_t &net, size_t nThreads=1)
Train the given net using the given training input data (events), training output data (labels),...