7 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 8 #include <numpy/arrayobject.h> 26 fContinueTraining =
false;
28 fTriesEarlyStopping = -1;
29 fLearningRateSchedule =
"";
30 fFilenameTrainedModel =
"";
61 DeclareOptionRef(
fTriesEarlyStopping,
"TriesEarlyStopping",
"Number of epochs with no improvement in validation loss after which training will be stopped. The default or a negative number deactivates this option.");
82 TString filenameLoadModel;
83 if (loadTrainedModel) {
89 PyRunString(
"model = keras.models.load_model('"+filenameLoadModel+
"')",
90 "Failed to load Keras model from file: "+filenameLoadModel);
91 Log() <<
kINFO <<
"Load model from file: " << filenameLoadModel <<
Endl;
101 else Log() <<
kFATAL <<
"Selected analysis type is not implemented" <<
Endl;
105 npy_intp dimsVals[2] = {(npy_intp)1, (npy_intp)
fNVars};
106 PyArrayObject* pVals = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsVals, NPY_FLOAT, (
void*)
fVals);
110 npy_intp dimsOutput[2] = {(npy_intp)1, (npy_intp)
fNOutputs};
111 PyArrayObject* pOutput = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsOutput, NPY_FLOAT, (
void*)&
fOutput[0]);
125 PyRunString(
"import keras",
"Import Keras failed");
140 float* trainDataX =
new float[nTrainingEvents*
fNVars];
141 float* trainDataY =
new float[nTrainingEvents*
fNOutputs];
142 float* trainDataWeights =
new float[nTrainingEvents];
143 for (
UInt_t i=0; i<nTrainingEvents; i++) {
163 else Log() <<
kFATAL <<
"Can not fill target vector because analysis type is not known" <<
Endl;
169 npy_intp dimsTrainX[2] = {(npy_intp)nTrainingEvents, (npy_intp)
fNVars};
170 npy_intp dimsTrainY[2] = {(npy_intp)nTrainingEvents, (npy_intp)
fNOutputs};
171 npy_intp dimsTrainWeights[1] = {(npy_intp)nTrainingEvents};
172 PyArrayObject* pTrainDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainX, NPY_FLOAT, (
void*)trainDataX);
173 PyArrayObject* pTrainDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainY, NPY_FLOAT, (
void*)trainDataY);
174 PyArrayObject* pTrainDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsTrainWeights, NPY_FLOAT, (
void*)trainDataWeights);
177 PyDict_SetItemString(
fLocalNS,
"trainWeights", (
PyObject*)pTrainDataWeights);
187 float* valDataX =
new float[nValEvents*
fNVars];
188 float* valDataY =
new float[nValEvents*
fNOutputs];
189 float* valDataWeights =
new float[nValEvents];
190 for (
UInt_t i=0; i<nValEvents; i++) {
208 else Log() <<
kFATAL <<
"Can not fill target vector because analysis type is not known" <<
Endl;
213 npy_intp dimsValX[2] = {(npy_intp)nValEvents, (npy_intp)
fNVars};
214 npy_intp dimsValY[2] = {(npy_intp)nValEvents, (npy_intp)
fNOutputs};
215 npy_intp dimsValWeights[1] = {(npy_intp)nValEvents};
216 PyArrayObject* pValDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValX, NPY_FLOAT, (
void*)valDataX);
217 PyArrayObject* pValDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValY, NPY_FLOAT, (
void*)valDataY);
218 PyArrayObject* pValDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsValWeights, NPY_FLOAT, (
void*)valDataWeights);
232 PyDict_SetItemString(
fLocalNS,
"batchSize", pBatchSize);
233 PyDict_SetItemString(
fLocalNS,
"numEpochs", pNumEpochs);
234 PyDict_SetItemString(
fLocalNS,
"verbose", pVerbose);
241 PyRunString(
"callbacks.append(keras.callbacks.ModelCheckpoint('"+
fFilenameTrainedModel+
"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))",
"Failed to setup training callback: SaveBestOnly");
242 Log() <<
kINFO <<
"Option SaveBestOnly: Only model weights with smallest validation loss will be stored" <<
Endl;
249 PyRunString(
"callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss', patience="+tries+
", verbose=verbose, mode='auto'))",
"Failed to setup training callback: TriesEarlyStopping");
250 Log() <<
kINFO <<
"Option TriesEarlyStopping: Training will stop after " << tries <<
" number of epochs with no improvement of validation loss" <<
Endl;
257 "schedulerSteps = {}\n" 258 "for c in strScheduleSteps.split(';'):\n" 259 " x = c.split(',')\n" 260 " schedulerSteps[int(x[0])] = float(x[1])\n",
264 PyRunString(
"def schedule(epoch, model=model, schedulerSteps=schedulerSteps):\n" 265 " if epoch in schedulerSteps: return float(schedulerSteps[epoch])\n" 266 " else: return float(model.optimizer.lr.get_value())\n",
270 PyRunString(
"callbacks.append(keras.callbacks.LearningRateScheduler(schedule))",
271 "Failed to setup training callback: LearningRateSchedule");
276 PyRunString(
"history = model.fit(trainX, trainY, sample_weight=trainWeights, batch_size=batchSize, nb_epoch=numEpochs, verbose=verbose, validation_data=(valX, valY, valWeights), callbacks=callbacks)",
277 "Failed to train model");
296 delete[] trainDataWeights;
299 delete[] valDataWeights;
320 PyRunString(
"for i,p in enumerate(model.predict(vals)): output[i]=p\n",
321 "Failed to get predictions");
336 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt =
nEvents;
337 if (firstEvt < 0) firstEvt = 0;
338 nEvents = lastEvt-firstEvt;
349 npy_intp dimsData[2] = {(npy_intp)nEvents, (npy_intp)
fNVars};
350 PyArrayObject* pDataMvaValues = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsData, NPY_FLOAT, (
void*)
data);
351 if (pDataMvaValues==0)
Log() <<
"Failed to load data to Python array" <<
Endl;
355 if (pModel==0)
Log() <<
kFATAL <<
"Failed to get model Python object" <<
Endl;
356 PyArrayObject* pPredictions = (PyArrayObject*) PyObject_CallMethod(pModel, (
char*)
"predict", (
char*)
"O", pDataMvaValues);
357 if (pPredictions==0)
Log() <<
kFATAL <<
"Failed to get predictions" <<
Endl;
362 std::vector<double> mvaValues(nEvents);
363 float* predictionsData = (
float*) PyArray_DATA(pPredictions);
382 PyRunString(
"for i,p in enumerate(model.predict(vals)): output[i]=p\n",
383 "Failed to get predictions");
410 PyRunString(
"for i,p in enumerate(model.predict(vals)): output[i]=p\n",
411 "Failed to get predictions");
423 Log() <<
"Keras is a high-level API for the Theano and Tensorflow packages." <<
Endl;
424 Log() <<
"This method wraps the training and predictions steps of the Keras" <<
Endl;
425 Log() <<
"Python package for TMVA, so that dataloading, preprocessing and" <<
Endl;
426 Log() <<
"evaluation can be done within the TMVA system. To use this Keras" <<
Endl;
427 Log() <<
"interface, you have to generate a model with Keras first. Then," <<
Endl;
428 Log() <<
"this model can be loaded and trained in TMVA." <<
Endl;
Double_t GetMvaValue(Double_t *errLower, Double_t *errUpper)
void SetCurrentEvent(Long64_t ievt) const
MsgLogger & Endl(MsgLogger &ml)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
UInt_t GetNClasses() const
static int PyIsInitialized()
const TString & GetWeightFileDir() const
std::vector< Float_t > & GetRegressionValues()
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=Py_single_input)
void GetHelpMessage() const
const Event * GetEvent() const
TString fFilenameTrainedModel
DataSetInfo & DataInfo() const
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Long64_t GetNTrainingEvents() const
const Event * GetTrainingEvent(Long64_t ievt) const
TString fLearningRateSchedule
const Event * GetTestingEvent(Long64_t ievt) const
Float_t GetTarget(UInt_t itgt) const
UInt_t GetNTargets() const
const char * GetName() const
Int_t fTriesEarlyStopping
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
std::vector< Float_t > & GetMulticlassValues()
Long64_t GetNTestEvents() const
UInt_t GetNVariables() const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
void SetupKerasModel(Bool_t loadTrainedModel)
static PyObject * fLocalNS
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
get all the MVA values for the events of the current Data type
virtual void TestClassification()
initialization
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Types::EAnalysisType GetAnalysisType() const
MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
virtual void TestClassification()
initialization
std::vector< float > fOutput
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)