79 fWeightInitialization(), fOutputFunction(), fLayoutString(), fErrorStrategy(),
80 fTrainingStrategyString(), fWeightInitializationString(), fArchitectureString(),
81 fTrainingSettings(), fResume(false), fSettings()
136 "Layou of the network.");
140 "Loss function: Mean squared error (regression)" 141 " or cross entropy (binary classifcation).");
146 "WeightInitialization",
147 "Weight initialization strategy");
153 "Which architecture to perfrom the training on.");
163 "ConvergenceSteps=50," 169 "DropRepetitions=5|LearningRate=1e-4," 172 "ConvergenceSteps=50," 177 "DropConfig=0.0+0.5+0.5," 179 "Multithreading=True",
181 "Defines the training strategies.");
191 const TString layerDelimiter(
",");
192 const TString subDelimiter(
"|");
194 const size_t inputSize =
GetNvar();
196 TObjArray* layerStrings = layoutString.Tokenize(layerDelimiter);
197 TIter nextLayer (layerStrings);
200 for (; layerString !=
nullptr; layerString = (
TObjString*) nextLayer()) {
202 EActivationFunction activationFunction = EActivationFunction::kTanh;
205 TIter nextToken (subStrings);
208 for (; token !=
nullptr; token = (
TObjString *) nextToken()) {
214 if (strActFnc ==
"RELU") {
216 }
else if (strActFnc ==
"TANH") {
218 }
else if (strActFnc ==
"SYMMRELU") {
220 }
else if (strActFnc ==
"SOFTSIGN") {
222 }
else if (strActFnc ==
"SIGMOID") {
224 }
else if (strActFnc ==
"LINEAR") {
226 }
else if (strActFnc ==
"GAUSS") {
235 strNumNodes.ReplaceAll (
"N", strN);
236 strNumNodes.ReplaceAll (
"n", strN);
238 numNodes = fml.
Eval (inputSize);
244 layout.push_back(std::make_pair(numNodes, activationFunction));
257 const TString keyValueDelim (
"=");
259 TObjArray* blockStrings = parseString.Tokenize (blockDelim);
260 TIter nextBlock (blockStrings);
263 for (; blockString !=
nullptr; blockString = (
TObjString *) nextBlock())
265 blockKeyValues.push_back (std::map<TString,TString>());
266 std::map<TString,TString>& currentBlock = blockKeyValues.back ();
269 TIter nextToken (subStrings);
272 for (; token !=
nullptr; token = (
TObjString *)nextToken())
275 int delimPos = strKeyValue.
First (keyValueDelim.
Data ());
281 TString strValue =
TString (strKeyValue (delimPos+1, strKeyValue.Length ()));
286 currentBlock.insert (std::make_pair (strKey, strValue));
289 return blockKeyValues;
296 std::map<TString, TString>::const_iterator it = keyValueMap.find (key);
297 if (it == keyValueMap.end()) {
304 template <
typename T>
305 T fetchValue(
const std::map<TString,TString>& keyValueMap,
315 TString value (fetchValue (keyValueMap, key));
319 return value.
Atoi ();
324 double fetchValue (
const std::map<TString,TString>& keyValueMap,
325 TString key,
double defaultValue)
327 TString value (fetchValue (keyValueMap, key));
331 return value.
Atof ();
339 TString value (fetchValue (keyValueMap, key));
348 bool fetchValue (
const std::map<TString,TString>& keyValueMap,
349 TString key,
bool defaultValue)
351 TString value (fetchValue (keyValueMap, key));
356 if (value ==
"TRUE" || value ==
"T" || value ==
"1") {
364 std::vector<double>
fetchValue(
const std::map<TString, TString> & keyValueMap,
366 std::vector<double> defaultValue)
368 TString parseString (fetchValue (keyValueMap, key));
369 if (parseString ==
"") {
373 std::vector<double> values;
375 const TString tokenDelim (
"+");
377 TIter nextToken (tokenStrings);
379 for (; tokenString !=
NULL; tokenString = (
TObjString*)nextToken ()) {
380 std::stringstream sstr;
383 sstr >> currentValue;
384 values.push_back (currentValue);
394 <<
"Will ignore negative events in training!" 409 auto itLayout = std::begin (
fLayout);
410 auto itLayoutEnd = std::end (
fLayout)-1;
411 for ( ; itLayout != itLayoutEnd; ++itLayout) {
414 fNet.
AddLayer(outputSize, EActivationFunction::kIdentity);
432 Log () <<
kWARNING <<
"For regression only SUMOFSQUARES is a valid " 433 <<
" neural net error function. Setting error function to " 434 <<
" SUMOFSQUARES now." <<
Endl;
446 Log () <<
kFatal <<
"MUTUALEXCLUSIVE not yet implemented." <<
Endl;
472 for (
auto& block : strategyKeyValues) {
482 std::vector<Double_t>());
486 if (regularization ==
"L1") {
488 }
else if (regularization ==
"L2") {
508 std::vector<TString> titles = {
"Error on training set",
"Error on test set"};
520 Log() <<
kFATAL <<
"OpenCL backend not yes supported." <<
Endl;
523 TrainCpu<Double_t>();
529 Log() <<
kINFO <<
"Using Standard Implementation.";
531 std::vector<Pattern> trainPattern;
532 std::vector<Pattern> testPattern;
537 for (
auto &event : eventCollectionTraining) {
538 const std::vector<Float_t>& values =
event->GetValues();
540 double outputValue =
event->GetClass () == 0 ? 0.9 : 0.1;
541 trainPattern.push_back(
Pattern (values.begin(),
544 event->GetWeight()));
545 trainPattern.back().addInput(1.0);
547 const std::vector<Float_t>& targets =
event->GetTargets ();
548 trainPattern.push_back(
Pattern(values.begin(),
552 event->GetWeight ()));
553 trainPattern.back ().addInput (1.0);
557 for (
auto &event : eventCollectionTesting) {
558 const std::vector<Float_t>& values =
event->GetValues();
560 double outputValue =
event->GetClass () == 0 ? 0.9 : 0.1;
561 testPattern.push_back(
Pattern (values.begin(),
564 event->GetWeight()));
565 testPattern.back().addInput(1.0);
567 const std::vector<Float_t>& targets =
event->GetTargets ();
568 testPattern.push_back(
Pattern(values.begin(),
572 event->GetWeight ()));
573 testPattern.back ().addInput (1.0);
578 std::vector<double> weights;
586 EActivationFunction
f =
fNet.
GetLayer(i).GetActivationFunction();
589 case EActivationFunction::kIdentity: g = EnumFunction::LINEAR;
break;
590 case EActivationFunction::kRelu: g = EnumFunction::RELU;
break;
591 case EActivationFunction::kSigmoid: g = EnumFunction::SIGMOID;
break;
592 case EActivationFunction::kTanh: g = EnumFunction::TANH;
break;
593 case EActivationFunction::kSymmRelu: g = EnumFunction::SYMMRELU;
break;
594 case EActivationFunction::kSoftSign: g = EnumFunction::SOFTSIGN;
break;
595 case EActivationFunction::kGauss: g = EnumFunction::GAUSS;
break;
602 case EOutputFunction::kIdentity: h = ModeOutputValues::DIRECT;
break;
603 case EOutputFunction::kSigmoid: h = ModeOutputValues::SIGMOID;
break;
610 case ELossFunction::kMeanSquaredError:
619 case EInitialization::kGauss:
621 std::back_inserter(weights));
623 case EInitialization::kUniform:
625 std::back_inserter(weights));
629 std::back_inserter(weights));
638 switch(s.regularization) {
640 case ERegularization::kL1: r = EnumRegularization::L1;
break;
641 case ERegularization::kL2: r = EnumRegularization::L2;
break;
645 s.testInterval, s.weightDecay, r,
647 s.momentum, 1, s.multithreading);
648 std::shared_ptr<Settings> ptrSettings(settings);
649 ptrSettings->setMonitoring (0);
651 <<
"Training with learning rate = " << ptrSettings->learningRate ()
652 <<
", momentum = " << ptrSettings->momentum ()
653 <<
", repetitions = " << ptrSettings->repetitions ()
656 ptrSettings->setProgressLimits ((idxSetting)*100.0/(
fSettings.size ()),
657 (idxSetting+1)*100.0/(
fSettings.size ()));
659 const std::vector<double>& dropConfig = ptrSettings->dropFractions ();
660 if (!dropConfig.empty ()) {
662 <<
" drop repetitions = " << ptrSettings->dropRepetitions()
667 for (
auto f : dropConfig) {
674 ptrSettings->momentum(),
675 ptrSettings->repetitions());
676 net.
train(weights, trainPattern, testPattern, minimizer, *ptrSettings.get());
681 size_t weightIndex = 0;
684 for (
Int_t j = 0; j < layerWeights.GetNcols(); j++) {
685 for (
Int_t i = 0; i < layerWeights.GetNrows(); i++) {
686 layerWeights(i,j) = weights[weightIndex];
692 for (
Int_t i = 0; i < layerBiases.GetNrows(); i++) {
693 layerBiases(i,0) = weights[weightIndex];
697 for (
Int_t i = 0; i < layerBiases.GetNrows(); i++) {
698 layerBiases(i,0) = 0.0;
710 #ifdef DNNCUDA // Included only if DNNCUDA flag is set. 715 Log() <<
kINFO <<
"Start of neural network training on GPU." <<
Endl;
717 size_t trainingPhase = 1;
730 auto testNet = net.
CreateClone(settings.batchSize);
732 Log() <<
kINFO <<
"Training phase " << trainingPhase <<
" of " 733 << fTrainingSettings.size() <<
":" <<
Endl;
746 testNet.GetBatchSize(),
750 settings.convergenceSteps,
751 settings.testInterval);
753 std::vector<TNet<TCuda<>>> nets{};
754 std::vector<TBatch<TCuda<>>> batches{};
755 nets.reserve(nThreads);
756 for (
size_t i = 0; i < nThreads; i++) {
758 for (
size_t j = 0; j < net.
GetDepth(); j++)
760 auto &masterLayer = net.
GetLayer(j);
761 auto &layer = nets.back().GetLayer(j);
763 masterLayer.GetWeights());
765 masterLayer.GetBiases());
769 bool converged =
false;
770 size_t stepCount = 0;
771 size_t batchesInEpoch = nTrainingSamples / net.
GetBatchSize();
773 std::chrono::time_point<std::chrono::system_clock> start, end;
774 start = std::chrono::system_clock::now();
776 Log() << std::setw(10) <<
"Epoch" <<
" | " 777 << std::setw(12) <<
"Train Err." 778 << std::setw(12) <<
"Test Err." 779 << std::setw(12) <<
"GFLOP/s" 780 << std::setw(12) <<
"Conv. Steps" <<
Endl;
789 trainingData.Shuffle();
790 for (
size_t i = 0; i < batchesInEpoch; i += nThreads) {
792 for (
size_t j = 0; j < nThreads; j++) {
793 batches.reserve(nThreads);
794 batches.push_back(trainingData.GetBatch());
796 if (settings.momentum > 0.0) {
797 minimizer.StepMomentum(net, nets, batches, settings.momentum);
799 minimizer.Step(net, nets, batches);
803 if ((stepCount % minimizer.GetTestInterval()) == 0) {
807 for (
auto batch : testData) {
808 auto inputMatrix = batch.GetInput();
809 auto outputMatrix = batch.GetOutput();
810 testError += testNet.Loss(inputMatrix, outputMatrix);
812 testError /= (
Double_t) (nTestSamples / settings.batchSize);
814 end = std::chrono::system_clock::now();
818 for (
auto batch : trainingData) {
819 auto inputMatrix = batch.GetInput();
820 auto outputMatrix = batch.GetOutput();
821 trainingError += net.
Loss(inputMatrix, outputMatrix);
823 trainingError /= (
Double_t) (nTrainingSamples / settings.batchSize);
827 fIPyCurrentIter = 100*(double)minimizer.GetConvergenceCount() /(double)settings.convergenceSteps;
832 std::chrono::duration<double> elapsed_seconds = end - start;
833 double seconds = elapsed_seconds.count();
834 double nFlops = (double) (settings.testInterval * batchesInEpoch);
837 converged = minimizer.HasConverged(testError);
838 start = std::chrono::system_clock::now();
840 Log() << std::setw(10) << stepCount <<
" | " 841 << std::setw(12) << trainingError
842 << std::setw(12) << testError
843 << std::setw(12) << nFlops / seconds
844 << std::setw(12) << minimizer.GetConvergenceCount() <<
Endl;
856 #else // DNNCUDA flag not set. 858 Log() <<
kFATAL <<
"CUDA backend not enabled. Please make sure " 859 "you have CUDA installed and it was successfully " 860 "detected by CMAKE." <<
Endl;
865 template<
typename AFloat>
869 #ifdef DNNCPU // Included only if DNNCPU flag is set. 874 Log() <<
kINFO <<
"Start of neural network training on CPU." <<
Endl <<
Endl;
878 size_t trainingPhase = 1;
885 Log() <<
"Training phase " << trainingPhase <<
" of " 886 << fTrainingSettings.size() <<
":" <<
Endl;
894 auto testNet = net.
CreateClone(settings.batchSize);
906 testNet.GetBatchSize(),
910 settings.convergenceSteps,
911 settings.testInterval);
913 std::vector<TNet<TCpu<AFloat>>> nets{};
914 std::vector<TBatch<TCpu<AFloat>>> batches{};
915 nets.reserve(nThreads);
916 for (
size_t i = 0; i < nThreads; i++) {
918 for (
size_t j = 0; j < net.
GetDepth(); j++)
920 auto &masterLayer = net.
GetLayer(j);
921 auto &layer = nets.back().GetLayer(j);
923 masterLayer.GetWeights());
925 masterLayer.GetBiases());
929 bool converged =
false;
930 size_t stepCount = 0;
931 size_t batchesInEpoch = nTrainingSamples / net.
GetBatchSize();
933 std::chrono::time_point<std::chrono::system_clock> start, end;
934 start = std::chrono::system_clock::now();
936 Log() << std::setw(10) <<
"Epoch" <<
" | " 937 << std::setw(12) <<
"Train Err." 938 << std::setw(12) <<
"Test Err." 939 << std::setw(12) <<
"GFLOP/s" 940 << std::setw(12) <<
"Conv. Steps" <<
Endl;
948 trainingData.Shuffle();
949 for (
size_t i = 0; i < batchesInEpoch; i += nThreads) {
951 for (
size_t j = 0; j < nThreads; j++) {
952 batches.reserve(nThreads);
953 batches.push_back(trainingData.GetBatch());
955 if (settings.momentum > 0.0) {
956 minimizer.StepMomentum(net, nets, batches, settings.momentum);
958 minimizer.Step(net, nets, batches);
962 if ((stepCount % minimizer.GetTestInterval()) == 0) {
965 AFloat testError = 0.0;
966 for (
auto batch : testData) {
967 auto inputMatrix = batch.GetInput();
968 auto outputMatrix = batch.GetOutput();
969 testError += testNet.Loss(inputMatrix, outputMatrix);
971 testError /= (
Double_t) (nTestSamples / settings.batchSize);
973 end = std::chrono::system_clock::now();
976 AFloat trainingError = 0.0;
977 for (
auto batch : trainingData) {
978 auto inputMatrix = batch.GetInput();
979 auto outputMatrix = batch.GetOutput();
980 trainingError += net.
Loss(inputMatrix, outputMatrix);
982 trainingError /= (
Double_t) (nTrainingSamples / settings.batchSize);
986 fIPyCurrentIter = 100*(double)minimizer.GetConvergenceCount() /(double)settings.convergenceSteps;
991 std::chrono::duration<double> elapsed_seconds = end - start;
992 double seconds = elapsed_seconds.count();
993 double nFlops = (double) (settings.testInterval * batchesInEpoch);
996 converged = minimizer.HasConverged(testError);
997 start = std::chrono::system_clock::now();
999 Log() << std::setw(10) << stepCount <<
" | " 1000 << std::setw(12) << trainingError
1001 << std::setw(12) << testError
1002 << std::setw(12) << nFlops / seconds
1003 << std::setw(12) << minimizer.GetConvergenceCount() <<
Endl;
1018 #else // DNNCPU flag not set. 1019 Log() <<
kFATAL <<
"Multi-core CPU backend not enabled. Please make sure " 1020 "you have a BLAS implementation and it was successfully " 1021 "detected by CMake as well that the imt CMake flag is set." <<
Endl;
1033 for (
size_t i = 0; i < nVariables; i++) {
1034 X(0,i) = inputValues[i];
1048 const std::vector<Float_t>& inputValues = ev->
GetValues();
1049 for (
size_t i = 0; i < nVariables; i++) {
1050 X(0,i) = inputValues[i];
1053 size_t nTargets = std::max(1u, ev->
GetNTargets());
1055 std::vector<Float_t>
output(nTargets);
1059 for (
size_t i = 0; i < nTargets; i++)
1060 output[i] = YHat(0, i);
1068 for (
size_t i = 0; i < nTargets; ++i) {
1073 for (
size_t i = 0; i < nTargets; ++i) {
1082 Log() <<
kFATAL <<
"ERROR: Multiclass classification not yet implemented." 1094 gTools().StringFromInt(inputWidth));
1100 for (
Int_t i = 0; i < depth; i++) {
1103 int activationFunction =
static_cast<int>(layer.GetActivationFunction());
1122 size_t inputWidth, depth;
1125 char lossFunctionChar;
1127 char outputFunctionChar;
1134 size_t previousWidth = inputWidth;
1136 for (
size_t i = 0; i < depth; i++) {
1138 EActivationFunction
f;
1142 f =
static_cast<EActivationFunction
>(fString.
Atoi());
1158 previousWidth = width;
1194 Log() << col <<
"--- Short description:" << colres <<
Endl;
1196 Log() <<
"The DNN neural network is a feedforward" <<
Endl;
1197 Log() <<
"multilayer perceptron impementation. The DNN has a user-" <<
Endl;
1198 Log() <<
"defined hidden layer architecture, where the number of input (output)" <<
Endl;
1199 Log() <<
"nodes is determined by the input variables (output classes, i.e., " <<
Endl;
1200 Log() <<
"signal and one background, regression or multiclass). " <<
Endl;
1202 Log() << col <<
"--- Performance optimisation:" << colres <<
Endl;
1205 const char* txt =
"The DNN supports various options to improve performance in terms of training speed and \n \ 1206 reduction of overfitting: \n \ 1208 - different training settings can be stacked. Such that the initial training \n\ 1209 is done with a large learning rate and a large drop out fraction whilst \n \ 1210 in a later stage learning rate and drop out can be reduced. \n \ 1213 initial training stage: 0.0 for the first layer, 0.5 for later layers. \n \ 1214 later training stage: 0.1 or 0.0 for all layers \n \ 1215 final training stage: 0.0] \n \ 1216 Drop out is a technique where a at each training cycle a fraction of arbitrary \n \ 1217 nodes is disabled. This reduces co-adaptation of weights and thus reduces overfitting. \n \ 1218 - L1 and L2 regularization are available \n \ 1220 [recommended 10 - 150] \n \ 1221 Arbitrary mini-batch sizes can be chosen. \n \ 1222 - Multithreading \n \ 1223 [recommended: True] \n \ 1224 Multithreading can be turned on. The minibatches are distributed to the available \n \ 1225 cores. The algorithm is lock-free (\"Hogwild!\"-style) for each cycle. \n \ 1229 - example: \"TANH|(N+30)*2,TANH|(N+30),LINEAR\" \n \ 1231 . two hidden layers (separated by \",\") \n \ 1232 . the activation function is TANH (other options: RELU, SOFTSIGN, LINEAR) \n \ 1233 . the activation function for the output layer is LINEAR \n \ 1234 . the first hidden layer has (N+30)*2 nodes where N is the number of input neurons \n \ 1235 . the second hidden layer has N+30 nodes, where N is the number of input neurons \n \ 1236 . the number of nodes in the output layer is determined by the number of output nodes \n \ 1237 and can therefore not be chosen freely. \n \ 1239 \"ErrorStrategy\": \n \ 1241 The error of the neural net is determined by a sum-of-squares error function \n \ 1242 For regression, this is the only possible choice. \n \ 1244 The error of the neural net is determined by a cross entropy function. The \n \ 1245 output values are automatically (internally) transformed into probabilities \n \ 1246 using a sigmoid function. \n \ 1247 For signal/background classification this is the default choice. \n \ 1248 For multiclass using cross entropy more than one or no output classes \n \ 1249 can be equally true or false (e.g. Event 0: A and B are true, Event 1: \n \ 1250 A and C is true, Event 2: C is true, ...) \n \ 1251 - MUTUALEXCLUSIVE \n \ 1252 In multiclass settings, exactly one of the output classes can be true (e.g. either A or B or C) \n \ 1254 \"WeightInitialization\" \n \ 1257 \"Xavier Glorot & Yoshua Bengio\"-style of initializing the weights. The weights are chosen randomly \n \ 1258 such that the variance of the values of the nodes is preserved for each layer. \n \ 1259 - XAVIERUNIFORM \n \ 1260 The same as XAVIER, but with uniformly distributed weights instead of gaussian weights \n \ 1262 Random values scaled by the layer size \n \ 1264 \"TrainingStrategy\" \n \ 1265 - example: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5|LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFraction=0.0,DropRepetitions=5\" \n \ 1266 - explanation: two stacked training settings separated by \"|\" \n \ 1267 . first training setting: \"LearningRate=1e-1,Momentum=0.3,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,Renormalize=L2,DropConfig=0.0,DropRepetitions=5\" \n \ 1268 . second training setting : \"LearningRate=1e-4,Momentum=0.3,ConvergenceSteps=50,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Renormalize=L2,DropFractions=0.0,DropRepetitions=5\" \n \ 1269 . LearningRate : \n \ 1270 - recommended for classification: 0.1 initially, 1e-4 later \n \ 1271 - recommended for regression: 1e-4 and less \n \ 1273 preserve a fraction of the momentum for the next training batch [fraction = 0.0 - 1.0] \n \ 1274 . Repetitions : \n \ 1275 train \"Repetitions\" repetitions with the same minibatch before switching to the next one \n \ 1276 . ConvergenceSteps : \n \ 1277 Assume that convergence is reached after \"ConvergenceSteps\" cycles where no improvement \n \ 1278 of the error on the test samples has been found. (Mind that only at each \"TestRepetitions\" \n \ 1279 cycle the test sampes are evaluated and thus the convergence is checked) \n \ 1281 Size of the mini-batches. \n \ 1282 . TestRepetitions \n \ 1283 Perform testing the neural net on the test samples each \"TestRepetitions\" cycle \n \ 1285 If \"Renormalize\" is set to L1 or L2, \"WeightDecay\" provides the renormalization factor \n \ 1287 NONE, L1 (|w|) or L2 (w^2) \n \ 1289 Drop a fraction of arbitrary nodes of each of the layers according to the values given \n \ 1290 in the DropConfig. \n \ 1291 [example: DropConfig=0.0+0.5+0.3 \n \ 1292 meaning: drop no nodes in layer 0 (input layer), half of the nodes in layer 1 and 30% of the nodes \n \ 1294 recommended: leave all the nodes turned on for the input layer (layer 0) \n \ 1295 turn off half of the nodes in later layers for the initial training; leave all nodes \n \ 1296 turned on (0.0) in later training stages] \n \ 1297 . DropRepetitions \n \ 1298 Each \"DropRepetitions\" cycle the configuration of which nodes are dropped is changed \n \ 1299 [recommended : 1] \n \ 1300 . Multithreading \n \ 1301 turn on multithreading [recommended: True] \n \ Types::EAnalysisType fAnalysisType
size_t GetOutputWidth() const
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
MsgLogger & Endl(MsgLogger &ml)
Scalar_t Loss(const Matrix_t &Y) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
#define REGISTER_METHOD(CLASS)
for example
EOutputFunction fOutputFunction
Collectable string class.
UInt_t GetNTargets() const
accessor to the number of targets
Steepest Gradient Descent algorithm (SGD)
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
const char * GetName() const
Bool_t IgnoreEventsWithNegWeightsInTraining() const
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Double_t Atof() const
Return floating-point value contained in string.
void GetHelpMessage() const
void ToUpper()
Change string to upper case.
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
size_t GetBatchSize() const
typename Architecture_t::Matrix_t Matrix_t
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
void MakeClassSpecific(std::ostream &, const TString &) const
ELossFunction GetLossFunction() const
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
void SetWeightDecay(Scalar_t weightDecay)
const char * Data() const
void AddLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Add a layer of the given size to the neural net.
std::vector< std::map< TString, TString >> KeyValueVector_t
void ReadWeightsFromStream(std::istream &i)
void Initialize(EInitialization m)
Initialize the weights in the net with the initialization method.
ERegularization regularization
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
std::vector< Double_t > dropoutProbabilities
UInt_t GetNTargets() const
void ClearGraphs()
This function sets the point number to 0 for all graphs.
void ReadWeightsFromXML(void *wghtnode)
void setInputSize(size_t sizeInput)
set the input size of the DNN
Generic neural network class.
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
TString fArchitectureString
Int_t Atoi() const
Return integer value of string.
static void ReadMatrixXML(void *xml, const char *name, TMatrixT< Double_t > &X)
UInt_t GetNVariables() const
accessor to the number of variables
EInitialization fWeightInitialization
UInt_t GetNVariables() const
KeyValueVector_t fSettings
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
TString GetString() const
const Ranking * CreateRanking()
void SetRegularization(ERegularization R)
void Clear()
Remove all layers from the network.
const Event * GetEvent() const
size_t GetInputWidth() const
static void WriteMatrixXML(void *parent, const char *name, const TMatrixT< Double_t > &X)
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
TSubString Strip(EStripType s=kTrailing, char c= ' ') const
Return a substring of self stripped at beginning and/or end.
Settings for the training of the neural net.
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size...
Layer defines the layout of a layer.
std::vector< std::pair< int, EActivationFunction >> LayoutVector_t
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
void Copy(void *source, void *dest)
TString fetchValue(const std::map< TString, TString > &keyValueMap, TString key)
std::vector< Float_t > * fMulticlassReturnVal
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
EOutputFunction
Enum that represents output functions.
ELossFunction
Enum that represents objective functions for the net, i.e.
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
void AddPreDefVal(const T &)
const TString & GetInputLabel(Int_t i) const
TString fTrainingStrategyString
TString fWeightInitializationString
Float_t GetTarget(UInt_t itgt) const
std::vector< TTrainingSettings > fTrainingSettings
void addLayer(Layer &layer)
add a layer (layout)
Abstract ClassifierFactory template that handles arbitrary types.
std::vector< Float_t > & GetValues()
IPythonInteractive * fInteractive
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
void InitializeGradients()
Initialize the gradients in the net to zero.
XMLNodePointer_t GetChild(XMLNodePointer_t xmlnode, Bool_t realnode=kTRUE)
returns first child of xml node
void SetLossFunction(ELossFunction J)
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
MethodDNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f)
Compute the neural network predictionion obtained from forwarding the batch X through the neural netw...
void SetInputWidth(size_t inputWidth)
LayoutVector_t ParseLayoutString(TString layerSpec)
EActivationFunction
Enum that represents layer activation functions.
Bool_t WriteOptionsReference() const
std::vector< Float_t > * fRegressionReturnVal
static void output(int code)
virtual const std::vector< Float_t > & GetMulticlassValues()
virtual const std::vector< Float_t > & GetRegressionValues()
void AddWeightsXMLTo(void *parent) const
Ssiz_t First(char c) const
Find first occurrence of a character c.
Layer_t & GetLayer(size_t i)
void SetBatchSize(size_t batchSize)
if(line.BeginsWith("/*"))