7#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
8#include <numpy/arrayobject.h>
25 PyGILState_STATE m_GILState;
28 PyGILRAII() : m_GILState(PyGILState_Ensure()) {}
29 ~PyGILRAII() { PyGILState_Release(m_GILState); }
88 DeclareOptionRef(
fTriesEarlyStopping,
"TriesEarlyStopping",
"Number of epochs with no improvement in validation loss after which training will be stopped. The default or a negative number deactivates this option.");
91 "Write a log during training to visualize and monitor the training performance with TensorBoard");
93 "Write a log during training to visualize and monitor the training performance with TensorBoard");
96 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
97 "Specify as 100 to use exactly 100 events. (Default: 20%)");
112 Int_t nValidationSamples = 0;
117 if (fNumValidationString.EndsWith(
"%")) {
122 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
123 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
125 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString
126 <<
"\". Expected string like \"20%\" or \"20.0%\"." <<
Endl;
128 }
else if (fNumValidationString.IsFloat()) {
129 Double_t valSizeAsDouble = fNumValidationString.Atof();
131 if (valSizeAsDouble < 1.0) {
133 nValidationSamples = GetEventCollection(
Types::kTraining).size() * valSizeAsDouble;
136 nValidationSamples = valSizeAsDouble;
139 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString <<
"\". Expected string like \"0.2\" or \"100\"."
145 if (nValidationSamples < 0) {
146 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is negative." <<
Endl;
149 if (nValidationSamples == 0) {
150 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is zero." <<
Endl;
153 if (nValidationSamples >= (
Int_t)trainingSetSize) {
154 Log() << kFATAL <<
"Validation size \"" << fNumValidationString
155 <<
"\" is larger than or equal in size to training set (size=\"" << trainingSetSize <<
"\")." <<
Endl;
158 return nValidationSamples;
185 Log() << kINFO <<
"Setting up tf.keras" <<
Endl;
187 Log() << kINFO <<
"Setting up keras with " <<
gSystem->
Getenv(
"KERAS_BACKEND") <<
" backend" <<
Endl;
189 bool useTFBackend =
kFALSE;
190 bool kerasIsCompatible =
kTRUE;
191 bool kerasIsPresent =
kFALSE;
198 kerasIsPresent =
kTRUE;
199 if (kerasIsPresent) {
202 useTFBackend =
kTRUE;
204 PyRunString(
"keras_major_version = int(keras.__version__.split('.')[0])");
205 PyRunString(
"keras_minor_version = int(keras.__version__.split('.')[1])");
206 PyObject *pyKerasMajorVersion = PyDict_GetItemString(
fLocalNS,
"keras_major_version");
207 PyObject *pyKerasMinorVersion = PyDict_GetItemString(
fLocalNS,
"keras_minor_version");
208 int kerasMajorVersion = PyLong_AsLong(pyKerasMajorVersion);
209 int kerasMinorVersion = PyLong_AsLong(pyKerasMinorVersion);
210 Log() << kINFO <<
"Using Keras version " << kerasMajorVersion <<
"." << kerasMinorVersion <<
Endl;
211 kerasIsCompatible = (kerasMajorVersion >= 2 && kerasMinorVersion >= 3);
216 Log() << kINFO <<
"Keras is not found. Trying using tf.keras" <<
Endl;
225 if (ret ==
nullptr) {
226 Log() << kFATAL <<
"Importing TensorFlow failed" <<
Endl;
229 PyRunString(
"tf_major_version = int(tf.__version__.split('.')[0])");
232 int tfVersion = PyLong_AsLong(pyTfVersion);
233 Log() << kINFO <<
"Using TensorFlow version " << tfVersion <<
Endl;
237 Log() << kWARNING <<
"Using TensorFlow version 1.x which does not contain tf.keras - use then TensorFlow as Keras backend" <<
Endl;
240 if (!kerasIsPresent) {
241 Log() << kFATAL <<
"Keras is not present and not a suitable TensorFlow version is found " <<
Endl;
248 if (!kerasIsCompatible) {
249 Log() << kWARNING <<
"The Keras version is not compatible with TensorFlow 2. Use instead tf.keras" <<
Endl;
258 Log() << kINFO <<
"Use Keras version from TensorFlow : tf.keras" <<
Endl;
264 Log() << kINFO <<
"Use TensorFlow as Keras backend" <<
Endl;
266 PyRunString(
"from keras.backend import tensorflow_backend as K");
272 TString configProto = (tfVersion >= 2) ?
"tf.compat.v1.ConfigProto" :
"tf.ConfigProto";
273 TString session = (tfVersion >= 2) ?
"tf.compat.v1.Session" :
"tf.Session";
277 if (num_threads > 0) {
278 Log() << kINFO <<
"Setting the CPU number of threads = " << num_threads <<
Endl;
281 TString::Format(
"session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)",
282 configProto.
Data(), num_threads, num_threads));
291 for (
int item = 0; item < optlist->
GetEntries(); ++item) {
292 Log() << kINFO <<
"Applying GPU option: gpu_options." << optlist->
At(item)->
GetName() <<
Endl;
301 PyRunString(
"tf.compat.v1.keras.backend.set_session(sess)");
308 Log() << kWARNING <<
"Cannot set the given " <<
fNumThreads <<
" threads when not using tensorflow as backend"
311 Log() << kWARNING <<
"Cannot set the given GPU option " <<
fGpuOptions
312 <<
" when not using tensorflow as backend" <<
Endl;
320 Log() << kINFO <<
" Loading Keras Model " <<
Endl;
332 TString errmsg =
"Error executing the provided user code";
335 PyRunString(
"print('custom objects for loading model : ',load_model_custom_objects)");
340 if (loadTrainedModel) {
348 "', custom_objects=load_model_custom_objects)",
"Failed to load Keras model from file: " + filenameLoadModel);
350 Log() << kINFO <<
"Loaded model from file: " << filenameLoadModel <<
Endl;
361 else Log() << kFATAL <<
"Selected analysis type is not implemented" <<
Endl;
365 npy_intp dimsVals[2] = {(npy_intp)1, (npy_intp)
fNVars};
366 PyArrayObject* pVals = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsVals, NPY_FLOAT, (
void*)
fVals);
370 npy_intp dimsOutput[2] = {(npy_intp)1, (npy_intp)
fNOutputs};
371 PyArrayObject* pOutput = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsOutput, NPY_FLOAT, (
void*)&
fOutput[0]);
383 TMVA::Internal::PyGILRAII raii;
386 Log() << kFATAL <<
"Python is not initialized" <<
Endl;
391 PyRunString(
"import sys; sys.argv = ['']",
"Set sys.argv failed");
407 UInt_t nTrainingEvents = nAllEvents - nValEvents;
409 Log() << kINFO <<
"Split TMVA training data in " << nTrainingEvents <<
" training events and "
410 << nValEvents <<
" validation events" <<
Endl;
412 float* trainDataX =
new float[nTrainingEvents*
fNVars];
413 float* trainDataY =
new float[nTrainingEvents*
fNOutputs];
414 float* trainDataWeights =
new float[nTrainingEvents];
415 for (
UInt_t i=0; i<nTrainingEvents; i++) {
419 trainDataX[j + i*
fNVars] =
e->GetValue(j);
432 trainDataY[j + i*
fNOutputs] =
e->GetTarget(j);
435 else Log() << kFATAL <<
"Can not fill target vector because analysis type is not known" <<
Endl;
438 trainDataWeights[i] =
e->GetWeight();
441 npy_intp dimsTrainX[2] = {(npy_intp)nTrainingEvents, (npy_intp)
fNVars};
442 npy_intp dimsTrainY[2] = {(npy_intp)nTrainingEvents, (npy_intp)
fNOutputs};
443 npy_intp dimsTrainWeights[1] = {(npy_intp)nTrainingEvents};
444 PyArrayObject* pTrainDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainX, NPY_FLOAT, (
void*)trainDataX);
445 PyArrayObject* pTrainDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainY, NPY_FLOAT, (
void*)trainDataY);
446 PyArrayObject* pTrainDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsTrainWeights, NPY_FLOAT, (
void*)trainDataWeights);
449 PyDict_SetItemString(
fLocalNS,
"trainWeights", (
PyObject*)pTrainDataWeights);
459 float* valDataX =
new float[nValEvents*
fNVars];
460 float* valDataY =
new float[nValEvents*
fNOutputs];
461 float* valDataWeights =
new float[nValEvents];
463 for (
UInt_t i=0; i< nValEvents ; i++) {
464 UInt_t ievt = nTrainingEvents + i;
468 valDataX[j + i*
fNVars] =
e->GetValue(j);
482 else Log() << kFATAL <<
"Can not fill target vector because analysis type is not known" <<
Endl;
484 valDataWeights[i] =
e->GetWeight();
487 npy_intp dimsValX[2] = {(npy_intp)nValEvents, (npy_intp)
fNVars};
488 npy_intp dimsValY[2] = {(npy_intp)nValEvents, (npy_intp)
fNOutputs};
489 npy_intp dimsValWeights[1] = {(npy_intp)nValEvents};
490 PyArrayObject* pValDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValX, NPY_FLOAT, (
void*)valDataX);
491 PyArrayObject* pValDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValY, NPY_FLOAT, (
void*)valDataY);
492 PyArrayObject* pValDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsValWeights, NPY_FLOAT, (
void*)valDataWeights);
500 Log() << kINFO <<
"Training Model Summary" <<
Endl;
508 PyDict_SetItemString(
fLocalNS,
"batchSize", pBatchSize);
509 PyDict_SetItemString(
fLocalNS,
"numEpochs", pNumEpochs);
510 PyDict_SetItemString(
fLocalNS,
"verbose", pVerbose);
517 PyRunString(
"callbacks.append(" +
fKerasString +
".callbacks.ModelCheckpoint('"+
fFilenameTrainedModel+
"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))",
"Failed to setup training callback: SaveBestOnly");
518 Log() << kINFO <<
"Option SaveBestOnly: Only model weights with smallest validation loss will be stored" <<
Endl;
525 PyRunString(
"callbacks.append(" +
fKerasString +
".callbacks.EarlyStopping(monitor='val_loss', patience="+tries+
", verbose=verbose, mode='auto'))",
"Failed to setup training callback: TriesEarlyStopping");
526 Log() << kINFO <<
"Option TriesEarlyStopping: Training will stop after " << tries <<
" number of epochs with no improvement of validation loss" <<
Endl;
533 "schedulerSteps = {}\n"
534 "for c in strScheduleSteps.split(';'):\n"
535 " x = c.split(',')\n"
536 " schedulerSteps[int(x[0])] = float(x[1])\n",
540 PyRunString(
"def schedule(epoch, model=model, schedulerSteps=schedulerSteps):\n"
541 " if epoch in schedulerSteps: return float(schedulerSteps[epoch])\n"
542 " else: return float(model.optimizer.lr.get_value())\n",
547 "Failed to setup training callback: LearningRateSchedule");
555 "callbacks.append(" +
fKerasString +
".callbacks.TensorBoard(log_dir=" + logdir +
556 ", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))",
557 "Failed to setup training callback: TensorBoard");
558 Log() << kINFO <<
"Option TensorBoard: Log files for training monitoring are stored in: " << logdir <<
Endl;
562 PyRunString(
"history = model.fit(trainX, trainY, sample_weight=trainWeights, batch_size=batchSize, epochs=numEpochs, verbose=verbose, validation_data=(valX, valY, valWeights), callbacks=callbacks)",
563 "Failed to train model");
566 std::vector<float> fHistory;
568 npy_intp dimsHistory[1] = { (npy_intp)
fNumEpochs};
569 PyArrayObject* pHistory = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsHistory, NPY_FLOAT, (
void*)&fHistory[0]);
574 PyRunString(
"number_of_keys=len(history.history.keys())");
576 int nkeys=PyLong_AsLong(PyNkeys);
577 for (iHis=0; iHis<nkeys; iHis++) {
583#if PY_MAJOR_VERSION < 3
589 PyObject* repr = PyObject_Repr(stra);
590 PyObject* str = PyUnicode_AsEncodedString(repr,
"utf-8",
"~E~");
594 Log() << kINFO <<
"Getting training history for item:" << iHis <<
" name = " <<
name <<
Endl;
597 for (
size_t i=0; i<fHistory.size(); i++)
620 delete[] trainDataWeights;
623 delete[] valDataWeights;
644 PyRunString(
"for i,p in enumerate(model.predict(vals)): output[i]=p\n",
645 "Failed to get predictions");
660 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
661 if (firstEvt < 0) firstEvt = 0;
662 nEvents = lastEvt-firstEvt;
671 <<
" sample (" << nEvents <<
" events)" <<
Endl;
673 float* data =
new float[nEvents*
fNVars];
674 for (
UInt_t i=0; i<nEvents; i++) {
678 data[j + i*
fNVars] =
e->GetValue(j);
682 npy_intp dimsData[2] = {(npy_intp)nEvents, (npy_intp)
fNVars};
683 PyArrayObject* pDataMvaValues = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsData, NPY_FLOAT, (
void*)data);
684 if (pDataMvaValues==0)
Log() <<
"Failed to load data to Python array" <<
Endl;
688 if (pModel==0)
Log() << kFATAL <<
"Failed to get model Python object" <<
Endl;
689 PyArrayObject* pPredictions = (PyArrayObject*) PyObject_CallMethod(pModel, (
char*)
"predict", (
char*)
"O", pDataMvaValues);
690 if (pPredictions==0)
Log() << kFATAL <<
"Failed to get predictions" <<
Endl;
695 std::vector<double> mvaValues(nEvents);
696 float* predictionsData = (
float*) PyArray_DATA(pPredictions);
697 for (
UInt_t i=0; i<nEvents; i++) {
703 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
722 PyRunString(
"for i,p in enumerate(model.predict(vals)): output[i]=p\n",
723 "Failed to get predictions");
750 PyRunString(
"for i,p in enumerate(model.predict(vals)): output[i]=p\n",
751 "Failed to get predictions");
763 Log() <<
"Keras is a high-level API for the Theano and Tensorflow packages." <<
Endl;
764 Log() <<
"This method wraps the training and predictions steps of the Keras" <<
Endl;
765 Log() <<
"Python package for TMVA, so that dataloading, preprocessing and" <<
Endl;
766 Log() <<
"evaluation can be done within the TMVA system. To use this Keras" <<
Endl;
767 Log() <<
"interface, you have to generate a model with Keras first. Then," <<
Endl;
768 Log() <<
"this model can be loaded and trained in TMVA." <<
Endl;
779 PyRunString(
"keras_backend_is_set = keras.backend.backend() == \"tensorflow\"");
780 PyObject * keras_backend = PyDict_GetItemString(
fLocalNS,
"keras_backend_is_set");
781 if (keras_backend !=
nullptr && keras_backend == Py_True)
784 PyRunString(
"keras_backend_is_set = keras.backend.backend() == \"theano\"");
785 keras_backend = PyDict_GetItemString(
fLocalNS,
"keras_backend_is_set");
786 if (keras_backend !=
nullptr && keras_backend == Py_True)
789 PyRunString(
"keras_backend_is_set = keras.backend.backend() == \"cntk\"");
790 keras_backend = PyDict_GetItemString(
fLocalNS,
"keras_backend_is_set");
791 if (keras_backend !=
nullptr && keras_backend == Py_True)
#define REGISTER_METHOD(CLASS)
for example
char * Form(const char *fmt,...)
R__EXTERN TSystem * gSystem
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Class that contains all the data information.
UInt_t GetNClasses() const
UInt_t GetNTargets() const
Types::ETreeType GetCurrentType() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Long64_t GetNTrainingEvents() const
void SetCurrentEvent(Long64_t ievt) const
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Float_t GetTarget(UInt_t itgt) const
const char * GetName() const
Types::EAnalysisType GetAnalysisType() const
const TString & GetWeightFileDir() const
const TString & GetMethodName() const
const Event * GetEvent() const
DataSetInfo & DataInfo() const
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
TrainingHistory fTrainHistory
const Event * GetTrainingEvent(Long64_t ievt) const
void GetHelpMessage() const
void Init()
Initialization function called from MethodBase::SetupMethod() Note that option string are not yet fil...
std::vector< float > fOutput
virtual void TestClassification()
initialization
void ProcessOptions()
Function processing the options This is called only when creating the method before training not when...
Bool_t UseTFKeras() const
Int_t fTriesEarlyStopping
EBackendType
enumeration defining the used Keras backend
void SetupKerasModel(Bool_t loadTrainedModel)
std::vector< Float_t > & GetMulticlassValues()
UInt_t GetNumValidationSamples()
Validation of the ValidationSize option.
Double_t GetMvaValue(Double_t *errLower, Double_t *errUpper)
std::vector< Float_t > & GetRegressionValues()
TString fNumValidationString
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
TString GetKerasBackendName()
MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
TString fLearningRateSchedule
EBackendType GetKerasBackend()
Get the Keras backend (can be: TensorFlow, Theano or CNTK)
TString fFilenameTrainedModel
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
get all the MVA values for the events of the current Data type
static int PyIsInitialized()
Check Python interpreter initialization status.
static PyObject * fGlobalNS
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=Py_single_input)
Execute Python code from string.
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
void AddValue(TString Property, Int_t stage, Double_t value)
Singleton class for Global types used by TMVA.
Int_t GetEntries() const
Return the number of objects in array (i.e.
TObject * At(Int_t idx) const
virtual const char * GetName() const
Returns name of object.
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
const char * Data() const
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
virtual const char * Getenv(const char *env)
Get environment variable.
create variable transformations
MsgLogger & Endl(MsgLogger &ml)