7#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
8#include <numpy/arrayobject.h>
25MethodPyKeras::MethodPyKeras(
const TString &jobName,
const TString &methodTitle,
DataSetInfo &dsi,
const TString &theOption)
71 DeclareOptionRef(
fTriesEarlyStopping,
"TriesEarlyStopping",
"Number of epochs with no improvement in validation loss after which training will be stopped. The default or a negative number deactivates this option.");
74 "Write a log during training to visualize and monitor the training performance with TensorBoard");
76 "Write a log during training to visualize and monitor the training performance with TensorBoard");
79 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
80 "Specify as 100 to use exactly 100 events. (Default: 20%)");
95 Int_t nValidationSamples = 0;
100 if (fNumValidationString.EndsWith(
"%")) {
104 if (intValStr.IsFloat()) {
105 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
106 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
108 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString
109 <<
"\". Expected string like \"20%\" or \"20.0%\"." <<
Endl;
111 }
else if (fNumValidationString.IsFloat()) {
112 Double_t valSizeAsDouble = fNumValidationString.Atof();
114 if (valSizeAsDouble < 1.0) {
116 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
119 nValidationSamples = valSizeAsDouble;
122 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString <<
"\". Expected string like \"0.2\" or \"100\"."
128 if (nValidationSamples < 0) {
129 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is negative." <<
Endl;
132 if (nValidationSamples == 0) {
133 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is zero." <<
Endl;
136 if (nValidationSamples >= (
Int_t)trainingSetSize) {
137 Log() << kFATAL <<
"Validation size \"" << fNumValidationString
138 <<
"\" is larger than or equal in size to training set (size=\"" << trainingSetSize <<
"\")." <<
Endl;
141 return nValidationSamples;
161 TString filenameLoadModel;
162 if (loadTrainedModel) {
168 PyRunString(
"model = keras.models.load_model('"+filenameLoadModel+
"')",
169 "Failed to load Keras model from file: "+filenameLoadModel);
170 Log() << kINFO <<
"Load model from file: " << filenameLoadModel <<
Endl;
180 else Log() << kFATAL <<
"Selected analysis type is not implemented" <<
Endl;
184 npy_intp dimsVals[2] = {(npy_intp)1, (npy_intp)
fNVars};
185 PyArrayObject* pVals = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsVals, NPY_FLOAT, (
void*)
fVals);
189 npy_intp dimsOutput[2] = {(npy_intp)1, (npy_intp)
fNOutputs};
190 PyArrayObject* pOutput = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsOutput, NPY_FLOAT, (
void*)&
fOutput[0]);
199 Log() << kFATAL <<
"Python is not initialized" <<
Endl;
205 PyRunString(
"import sys; sys.argv = ['']",
"Set sys.argv failed");
206 PyRunString(
"import keras",
"Import Keras failed");
221 UInt_t nTrainingEvents = nAllEvents - nValEvents;
223 Log() << kINFO <<
"Split TMVA training data in " << nTrainingEvents <<
" training events and "
224 << nValEvents <<
" validation events" <<
Endl;
226 float* trainDataX =
new float[nTrainingEvents*
fNVars];
227 float* trainDataY =
new float[nTrainingEvents*
fNOutputs];
228 float* trainDataWeights =
new float[nTrainingEvents];
229 for (
UInt_t i=0; i<nTrainingEvents; i++) {
233 trainDataX[j + i*
fNVars] =
e->GetValue(j);
246 trainDataY[j + i*
fNOutputs] =
e->GetTarget(j);
249 else Log() << kFATAL <<
"Can not fill target vector because analysis type is not known" <<
Endl;
252 trainDataWeights[i] =
e->GetWeight();
255 npy_intp dimsTrainX[2] = {(npy_intp)nTrainingEvents, (npy_intp)
fNVars};
256 npy_intp dimsTrainY[2] = {(npy_intp)nTrainingEvents, (npy_intp)
fNOutputs};
257 npy_intp dimsTrainWeights[1] = {(npy_intp)nTrainingEvents};
258 PyArrayObject* pTrainDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainX, NPY_FLOAT, (
void*)trainDataX);
259 PyArrayObject* pTrainDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainY, NPY_FLOAT, (
void*)trainDataY);
260 PyArrayObject* pTrainDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsTrainWeights, NPY_FLOAT, (
void*)trainDataWeights);
263 PyDict_SetItemString(
fLocalNS,
"trainWeights", (
PyObject*)pTrainDataWeights);
273 float* valDataX =
new float[nValEvents*
fNVars];
274 float* valDataY =
new float[nValEvents*
fNOutputs];
275 float* valDataWeights =
new float[nValEvents];
277 for (
UInt_t i=0; i< nValEvents ; i++) {
278 UInt_t ievt = nTrainingEvents + i;
282 valDataX[j + i*
fNVars] =
e->GetValue(j);
296 else Log() << kFATAL <<
"Can not fill target vector because analysis type is not known" <<
Endl;
298 valDataWeights[i] =
e->GetWeight();
301 npy_intp dimsValX[2] = {(npy_intp)nValEvents, (npy_intp)
fNVars};
302 npy_intp dimsValY[2] = {(npy_intp)nValEvents, (npy_intp)
fNOutputs};
303 npy_intp dimsValWeights[1] = {(npy_intp)nValEvents};
304 PyArrayObject* pValDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValX, NPY_FLOAT, (
void*)valDataX);
305 PyArrayObject* pValDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValY, NPY_FLOAT, (
void*)valDataY);
306 PyArrayObject* pValDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsValWeights, NPY_FLOAT, (
void*)valDataWeights);
320 PyDict_SetItemString(
fLocalNS,
"batchSize", pBatchSize);
321 PyDict_SetItemString(
fLocalNS,
"numEpochs", pNumEpochs);
322 PyDict_SetItemString(
fLocalNS,
"verbose", pVerbose);
329 PyRunString(
"callbacks.append(keras.callbacks.ModelCheckpoint('"+
fFilenameTrainedModel+
"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))",
"Failed to setup training callback: SaveBestOnly");
330 Log() << kINFO <<
"Option SaveBestOnly: Only model weights with smallest validation loss will be stored" <<
Endl;
337 PyRunString(
"callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss', patience="+tries+
", verbose=verbose, mode='auto'))",
"Failed to setup training callback: TriesEarlyStopping");
338 Log() << kINFO <<
"Option TriesEarlyStopping: Training will stop after " << tries <<
" number of epochs with no improvement of validation loss" <<
Endl;
345 "schedulerSteps = {}\n"
346 "for c in strScheduleSteps.split(';'):\n"
347 " x = c.split(',')\n"
348 " schedulerSteps[int(x[0])] = float(x[1])\n",
352 PyRunString(
"def schedule(epoch, model=model, schedulerSteps=schedulerSteps):\n"
353 " if epoch in schedulerSteps: return float(schedulerSteps[epoch])\n"
354 " else: return float(model.optimizer.lr.get_value())\n",
358 PyRunString(
"callbacks.append(keras.callbacks.LearningRateScheduler(schedule))",
359 "Failed to setup training callback: LearningRateSchedule");
365 TString logdir = TString(
"'") +
fTensorBoard + TString(
"'");
367 "callbacks.append(keras.callbacks.TensorBoard(log_dir=" + logdir +
368 ", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))",
369 "Failed to setup training callback: TensorBoard");
370 Log() << kINFO <<
"Option TensorBoard: Log files for training monitoring are stored in: " << logdir <<
Endl;
374 PyRunString(
"history = model.fit(trainX, trainY, sample_weight=trainWeights, batch_size=batchSize, epochs=numEpochs, verbose=verbose, validation_data=(valX, valY, valWeights), callbacks=callbacks)",
375 "Failed to train model");
394 delete[] trainDataWeights;
397 delete[] valDataWeights;
418 PyRunString(
"for i,p in enumerate(model.predict(vals)): output[i]=p\n",
419 "Failed to get predictions");
434 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
435 if (firstEvt < 0) firstEvt = 0;
436 nEvents = lastEvt-firstEvt;
445 <<
" sample (" << nEvents <<
" events)" <<
Endl;
448 for (
UInt_t i=0; i<nEvents; i++) {
456 npy_intp dimsData[2] = {(npy_intp)nEvents, (npy_intp)
fNVars};
457 PyArrayObject* pDataMvaValues = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsData, NPY_FLOAT, (
void*)
data);
458 if (pDataMvaValues==0)
Log() <<
"Failed to load data to Python array" <<
Endl;
462 if (pModel==0)
Log() << kFATAL <<
"Failed to get model Python object" <<
Endl;
463 PyArrayObject* pPredictions = (PyArrayObject*) PyObject_CallMethod(pModel, (
char*)
"predict", (
char*)
"O", pDataMvaValues);
464 if (pPredictions==0)
Log() << kFATAL <<
"Failed to get predictions" <<
Endl;
469 std::vector<double> mvaValues(nEvents);
470 float* predictionsData = (
float*) PyArray_DATA(pPredictions);
471 for (
UInt_t i=0; i<nEvents; i++) {
477 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
496 PyRunString(
"for i,p in enumerate(model.predict(vals)): output[i]=p\n",
497 "Failed to get predictions");
524 PyRunString(
"for i,p in enumerate(model.predict(vals)): output[i]=p\n",
525 "Failed to get predictions");
537 Log() <<
"Keras is a high-level API for the Theano and Tensorflow packages." <<
Endl;
538 Log() <<
"This method wraps the training and predictions steps of the Keras" <<
Endl;
539 Log() <<
"Python package for TMVA, so that dataloading, preprocessing and" <<
Endl;
540 Log() <<
"evaluation can be done within the TMVA system. To use this Keras" <<
Endl;
541 Log() <<
"interface, you have to generate a model with Keras first. Then," <<
Endl;
542 Log() <<
"this model can be loaded and trained in TMVA." <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
char * Form(const char *fmt,...)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Class that contains all the data information.
UInt_t GetNClasses() const
UInt_t GetNTargets() const
Types::ETreeType GetCurrentType() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Long64_t GetNTrainingEvents() const
void SetCurrentEvent(Long64_t ievt) const
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Float_t GetTarget(UInt_t itgt) const
const char * GetName() const
Types::EAnalysisType GetAnalysisType() const
const TString & GetWeightFileDir() const
const TString & GetMethodName() const
const Event * GetEvent() const
DataSetInfo & DataInfo() const
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
const Event * GetTrainingEvent(Long64_t ievt) const
void GetHelpMessage() const
std::vector< float > fOutput
virtual void TestClassification()
initialization
Int_t fTriesEarlyStopping
void SetupKerasModel(Bool_t loadTrainedModel)
std::vector< Float_t > & GetMulticlassValues()
UInt_t GetNumValidationSamples()
Validation of the ValidationSize option.
Double_t GetMvaValue(Double_t *errLower, Double_t *errUpper)
std::vector< Float_t > & GetRegressionValues()
TString fNumValidationString
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
TString fLearningRateSchedule
TString fFilenameTrainedModel
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
get all the MVA values for the events of the current Data type
static int PyIsInitialized()
Check Python interpreter initialization status.
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=Py_single_input)
Execute Python code from string.
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Singleton class for Global types used by TMVA.
Abstract ClassifierFactory template that handles arbitrary types.
MsgLogger & Endl(MsgLogger &ml)