7#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
8#include <numpy/arrayobject.h>
91 DeclareOptionRef(
fTriesEarlyStopping,
"TriesEarlyStopping",
"Number of epochs with no improvement in validation loss after which training will be stopped. The default or a negative number deactivates this option.");
94 "Write a log during training to visualize and monitor the training performance with TensorBoard");
97 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
98 "Specify as 100 to use exactly 100 events. (Default: 20%)");
100 "Optional python code provided by the user to be executed before loading the Keras model");
118 if (fNumValidationString.EndsWith(
"%")) {
126 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString
127 <<
"\". Expected string like \"20%\" or \"20.0%\"." <<
Endl;
129 }
else if (fNumValidationString.IsFloat()) {
140 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString <<
"\". Expected string like \"0.2\" or \"100\"."
147 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is negative." <<
Endl;
151 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is zero." <<
Endl;
155 Log() << kFATAL <<
"Validation size \"" << fNumValidationString
156 <<
"\" is larger than or equal in size to training set (size=\"" <<
trainingSetSize <<
"\")." <<
Endl;
187 Log() << kINFO <<
"Setting up tf.keras" <<
Endl;
189 Log() << kINFO <<
"Setting up keras with " <<
gSystem->
Getenv(
"KERAS_BACKEND") <<
" backend" <<
Endl;
206 PyRunString(
"keras_major_version = int(keras.__version__.split('.')[0])");
207 PyRunString(
"keras_minor_version = int(keras.__version__.split('.')[1])");
222 Log() << kINFO <<
"Keras is not found. Trying using tf.keras" <<
Endl;
231 if (
ret ==
nullptr) {
232 Log() << kFATAL <<
"Importing TensorFlow failed" <<
Endl;
235 PyRunString(
"tf_major_version = int(tf.__version__.split('.')[0])");
242 Log() << kWARNING <<
"Using TensorFlow version 1.x which does not contain tf.keras - use then TensorFlow as Keras backend" <<
Endl;
246 Log() << kFATAL <<
"Keras is not present and not a suitable TensorFlow version is found " <<
Endl;
254 Log() << kWARNING <<
"The Keras version is not compatible with TensorFlow 2. Use instead tf.keras" <<
Endl;
263 Log() << kINFO <<
"Use Keras version from TensorFlow : tf.keras" <<
Endl;
269 Log() << kINFO <<
"Use TensorFlow as Keras backend" <<
Endl;
271 PyRunString(
"from keras.backend import tensorflow_backend as K");
286 TString::Format(
"session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)",
296 for (
int item = 0; item <
optlist->GetEntries(); ++item) {
297 Log() << kINFO <<
"Applying GPU option: gpu_options." <<
optlist->At(item)->GetName() <<
Endl;
306 PyRunString(
"tf.compat.v1.keras.backend.set_session(sess)");
313 Log() << kWARNING <<
"Cannot set the given " <<
fNumThreads <<
" threads when not using tensorflow as backend"
316 Log() << kWARNING <<
"Cannot set the given GPU option " <<
fGpuOptions
317 <<
" when not using tensorflow as backend" <<
Endl;
328 Log() << kINFO <<
" Loading Keras Model " <<
Endl;
343 PyRunString(
"print('custom objects for loading model : ',load_model_custom_objects)");
356 "', custom_objects=load_model_custom_objects)",
"Failed to load Keras model from file: " +
filenameLoadModel);
369 else Log() << kFATAL <<
"Selected analysis type is not implemented" <<
Endl;
386 PyRunString(
"tf.compat.v1.disable_eager_execution()",
"Failed to disable eager execution");
387 Log() << kINFO <<
"Disabled TF eager execution when evaluating model " <<
Endl;
419 Log() << kFATAL <<
"Python is not initialized" <<
Endl;
424 PyRunString(
"import sys; sys.argv = ['']",
"Set sys.argv failed");
443 Log() << kINFO <<
"Split TMVA training data in " <<
nTrainingEvents <<
" training events and "
469 else Log() << kFATAL <<
"Can not fill target vector because analysis type is not known" <<
Endl;
516 else Log() << kFATAL <<
"Can not fill target vector because analysis type is not known" <<
Endl;
534 Log() << kINFO <<
"Training Model Summary" <<
Endl;
551 PyRunString(
"callbacks.append(" +
fKerasString +
".callbacks.ModelCheckpoint('"+
fFilenameTrainedModel+
"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))",
"Failed to setup training callback: SaveBestOnly");
552 Log() << kINFO <<
"Option SaveBestOnly: Only model weights with smallest validation loss will be stored" <<
Endl;
559 PyRunString(
"callbacks.append(" +
fKerasString +
".callbacks.EarlyStopping(monitor='val_loss', patience="+
tries+
", verbose=verbose, mode='auto'))",
"Failed to setup training callback: TriesEarlyStopping");
560 Log() << kINFO <<
"Option TriesEarlyStopping: Training will stop after " <<
tries <<
" number of epochs with no improvement of validation loss" <<
Endl;
567 "schedulerSteps = {}\n"
568 "for c in strScheduleSteps.split(';'):\n"
569 " x = c.split(',')\n"
570 " schedulerSteps[int(x[0])] = float(x[1])\n",
574 PyRunString(
"def schedule(epoch, model=model, schedulerSteps=schedulerSteps):\n"
575 " if epoch in schedulerSteps: return float(schedulerSteps[epoch])\n"
576 " else: return float(model.optimizer.lr.get_value())\n",
581 "Failed to setup training callback: LearningRateSchedule");
590 ", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))",
591 "Failed to setup training callback: TensorBoard");
592 Log() << kINFO <<
"Option TensorBoard: Log files for training monitoring are stored in: " <<
logdir <<
Endl;
596 PyRunString(
"history = model.fit(trainX, trainY, sample_weight=trainWeights, batch_size=batchSize, epochs=numEpochs, verbose=verbose, validation_data=(valX, valY, valWeights), callbacks=callbacks)",
597 "Failed to train model");
600 std::vector<float> fHistory;
608 PyRunString(
"number_of_keys=len(history.history.keys())");
621 Log() << kINFO <<
"Getting training history for item:" <<
iHis <<
" name = " <<
name <<
Endl;
624 for (
size_t i=0; i<fHistory.size(); i++)
673 +
")): output[i]=p\n";
700 <<
" sample (" << nEvents <<
" events)" <<
Endl;
703 for (
UInt_t i=0; i<nEvents; i++) {
711 std::vector<double> mvaValues(nEvents);
718 if (
pModel==0)
Log() << kFATAL <<
"Failed to get model Python object" <<
Endl;
726 for (
UInt_t i=0; i<nEvents; i++) {
732 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
733 <<
timer.GetElapsedTime() <<
" " <<
Endl;
755 +
")): output[i]=p\n";
785 +
")): output[i]=p\n";
798 Log() <<
"Keras is a high-level API for the Theano and Tensorflow packages." <<
Endl;
799 Log() <<
"This method wraps the training and predictions steps of the Keras" <<
Endl;
800 Log() <<
"Python package for TMVA, so that dataloading, preprocessing and" <<
Endl;
801 Log() <<
"evaluation can be done within the TMVA system. To use this Keras" <<
Endl;
802 Log() <<
"interface, you have to generate a model with Keras first. Then," <<
Endl;
803 Log() <<
"this model can be loaded and trained in TMVA." <<
Endl;
814 PyRunString(
"keras_backend_is_set = keras.backend.backend() == \"tensorflow\"");
819 PyRunString(
"keras_backend_is_set = keras.backend.backend() == \"theano\"");
824 PyRunString(
"keras_backend_is_set = keras.backend.backend() == \"cntk\"");
#define REGISTER_METHOD(CLASS)
for example
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
R__EXTERN TSystem * gSystem
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Class that contains all the data information.
UInt_t GetNClasses() const
UInt_t GetNTargets() const
Types::ETreeType GetCurrentType() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Long64_t GetNTrainingEvents() const
void SetCurrentEvent(Long64_t ievt) const
PyGILState_STATE m_GILState
const char * GetName() const
Types::EAnalysisType GetAnalysisType() const
const TString & GetWeightFileDir() const
const TString & GetMethodName() const
const Event * GetEvent() const
DataSetInfo & DataInfo() const
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
TrainingHistory fTrainHistory
const Event * GetTrainingEvent(Long64_t ievt) const
void GetHelpMessage() const
void Init()
Initialization function called from MethodBase::SetupMethod() Note that option string are not yet fil...
std::vector< float > fOutput
virtual void TestClassification()
initialization
void ProcessOptions()
Function processing the options This is called only when creating the method before training not when...
Bool_t UseTFKeras() const
Int_t fTriesEarlyStopping
EBackendType
enumeration defining the used Keras backend
void SetupKerasModel(Bool_t loadTrainedModel)
std::vector< Float_t > & GetMulticlassValues()
UInt_t GetNumValidationSamples()
Validation of the ValidationSize option.
Double_t GetMvaValue(Double_t *errLower, Double_t *errUpper)
void SetupKerasModelForEval()
Setting up model for evaluation Add here some needed optimizations like disabling eager execution.
std::vector< Float_t > & GetRegressionValues()
bool fModelIsSetupForEval
TString fNumValidationString
std::vector< float > fVals
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
TString GetKerasBackendName()
MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
TString fLearningRateSchedule
EBackendType GetKerasBackend()
Get the Keras backend (can be: TensorFlow, Theano or CNTK)
TString fFilenameTrainedModel
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
get all the MVA values for the events of the current Data type
static int PyIsInitialized()
Check Python interpreter initialization status.
static PyObject * fGlobalNS
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=256)
Execute Python code from string.
Timing information for training and evaluation of MVA methods.
void AddValue(TString Property, Int_t stage, Double_t value)
Singleton class for Global types used by TMVA.
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
virtual const char * Getenv(const char *env)
Get environment variable.
std::string ToString(const T &val)
Utility function for conversion to strings.
create variable transformations
MsgLogger & Endl(MsgLogger &ml)