7#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
8#include <numpy/arrayobject.h>
45 fContinueTraining =
false;
47 fTriesEarlyStopping = -1;
48 fLearningRateSchedule =
"";
49 fFilenameTrainedModel =
"";
93 DeclareOptionRef(
fTriesEarlyStopping,
"TriesEarlyStopping",
"Number of epochs with no improvement in validation loss after which training will be stopped. The default or a negative number deactivates this option.");
96 "Write a log during training to visualize and monitor the training performance with TensorBoard");
99 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
100 "Specify as 100 to use exactly 100 events. (Default: 20%)");
102 "Optional python code provided by the user to be executed before loading the Keras model");
120 if (fNumValidationString.EndsWith(
"%")) {
128 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString
129 <<
"\". Expected string like \"20%\" or \"20.0%\"." <<
Endl;
131 }
else if (fNumValidationString.IsFloat()) {
142 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString <<
"\". Expected string like \"0.2\" or \"100\"."
149 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is negative." <<
Endl;
153 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is zero." <<
Endl;
157 Log() << kFATAL <<
"Validation size \"" << fNumValidationString
158 <<
"\" is larger than or equal in size to training set (size=\"" <<
trainingSetSize <<
"\")." <<
Endl;
193 Log() << kINFO <<
"Setting up tf.keras" <<
Endl;
195 Log() << kINFO <<
"Setting up keras with " <<
gSystem->
Getenv(
"KERAS_BACKEND") <<
" backend" <<
Endl;
212 PyRunString(
"keras_major_version = int(keras.__version__.split('.')[0])");
213 PyRunString(
"keras_minor_version = int(keras.__version__.split('.')[1])");
228 Log() << kINFO <<
"Keras is not found. Trying using tf.keras" <<
Endl;
237 if (
ret ==
nullptr) {
238 Log() << kFATAL <<
"Importing TensorFlow failed" <<
Endl;
241 PyRunString(
"tf_major_version = int(tf.__version__.split('.')[0])");
242 PyRunString(
"tf_minor_version = int(tf.__version__.split('.')[1])");
251 Log() << kWARNING <<
"Using TensorFlow version 1.x which does not contain tf.keras - use then TensorFlow as Keras backend" <<
Endl;
255 Log() << kFATAL <<
"Keras is not present and not a suitable TensorFlow version is found " <<
Endl;
263 Log() << kWARNING <<
"The Keras version is not compatible with TensorFlow 2. Use instead tf.keras" <<
Endl;
270 Log() << kFATAL <<
"Cannot use .h5 files with new Keras 3 API. Use .keras" <<
Endl;
278 Log() << kINFO <<
"Use Keras version from TensorFlow : tf.keras" <<
Endl;
284 Log() << kINFO <<
"Use TensorFlow as Keras backend" <<
Endl;
286 PyRunString(
"from keras.backend import tensorflow_backend as K");
302 TString::Format(
"session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)",
313 Log() << kINFO <<
"Applying GPU option: gpu_options." <<
optlist->At(
item)->GetName() <<
Endl;
322 PyRunString(
"tf.compat.v1.keras.backend.set_session(sess)");
336 Log() << kINFO <<
"Applying GPU option: allow_growth=True " <<
Endl;
338 PyRunString(
"physical_devices = tf.config.list_physical_devices('GPU')");
339 PyRunString(
"tf.config.experimental.set_memory_growth(physical_devices[0], True)");
349 Log() << kWARNING <<
"Cannot set the given " <<
fNumThreads <<
" threads when not using tensorflow as backend"
352 Log() << kWARNING <<
"Cannot set the given GPU option " <<
fGpuOptions
353 <<
" when not using tensorflow as backend" <<
Endl;
364 Log() << kINFO <<
" Loading Keras Model " <<
Endl;
379 PyRunString(
"print('custom objects for loading model : ',load_model_custom_objects)");
392 "', custom_objects=load_model_custom_objects)",
"Failed to load Keras model from file: " +
filenameLoadModel);
405 else Log() << kFATAL <<
"Selected analysis type is not implemented" <<
Endl;
422 PyRunString(
"tf.compat.v1.disable_eager_execution()",
"Failed to disable eager execution");
423 Log() << kINFO <<
"Disabled TF eager execution when evaluating model " <<
Endl;
441 Log() << kFATAL <<
"Python is not initialized" <<
Endl;
446 PyRunString(
"import sys; sys.argv = ['']",
"Set sys.argv failed");
465 Log() << kINFO <<
"Split TMVA training data in " <<
nTrainingEvents <<
" training events and "
491 else Log() << kFATAL <<
"Can not fill target vector because analysis type is not known" <<
Endl;
538 else Log() << kFATAL <<
"Can not fill target vector because analysis type is not known" <<
Endl;
556 Log() << kINFO <<
"Training Model Summary" <<
Endl;
573 PyRunString(
"callbacks.append(" +
fKerasString +
".callbacks.ModelCheckpoint('"+
fFilenameTrainedModel+
"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))",
"Failed to setup training callback: SaveBestOnly");
574 Log() << kINFO <<
"Option SaveBestOnly: Only model weights with smallest validation loss will be stored" <<
Endl;
581 PyRunString(
"callbacks.append(" +
fKerasString +
".callbacks.EarlyStopping(monitor='val_loss', patience="+
tries+
", verbose=verbose, mode='auto'))",
"Failed to setup training callback: TriesEarlyStopping");
582 Log() << kINFO <<
"Option TriesEarlyStopping: Training will stop after " <<
tries <<
" number of epochs with no improvement of validation loss" <<
Endl;
588 std::vector<std::pair<std::string, std::string>>
scheduleSteps;
593 if (!
x ||
x->GetEntries() != 2) {
594 Log() << kFATAL <<
"Invalid values given in LearningRateSchedule, it should be as \"10,0.1;20,0.01\""
598 std::string((*
x)[1]->
GetName() ) ) );
599 std::cout <<
" add learning rate schedule " <<
scheduleSteps.back().first <<
" : " <<
scheduleSteps.back().second << std::endl;
618 " for e in epochs:\n"
619 " if (epoch < e) :\n"
620 " return lrValues[i]\n"
627 "Failed to setup training callback: LearningRateSchedule");
636 ", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))",
637 "Failed to setup training callback: TensorBoard");
638 Log() << kINFO <<
"Option TensorBoard: Log files for training monitoring are stored in: " <<
logdir <<
Endl;
642 PyRunString(
"history = model.fit(trainX, trainY, sample_weight=trainWeights, batch_size=batchSize, epochs=numEpochs, verbose=verbose, validation_data=(valX, valY, valWeights), callbacks=callbacks)",
643 "Failed to train model");
646 std::vector<float> fHistory;
654 PyRunString(
"number_of_keys=len(history.history.keys())");
667 Log() << kINFO <<
"Getting training history for item:" <<
iHis <<
" name = " <<
name <<
Endl;
670 for (
size_t i=0; i<fHistory.size(); i++)
705 size_t inputSize =
fNVars*nEvents;
710 fVals.resize(inputSize);
715 Log() << kFATAL <<
"Failed to load data to Python array" <<
Endl;
727 Log() << kFATAL <<
"Failed to create output data Python array" <<
Endl;
751 +
")): output[i]=p\n";
775 for (
UInt_t i=0; i<nEvents; i++) {
783 std::vector<double> mvaValues(nEvents);
787 if (
pModel==0)
Log() << kFATAL <<
"Failed to get model Python object" <<
Endl;
796 for (
UInt_t i=0; i<nEvents; i++) {
820 +
")): output[i]=p\n";
852 for (
UInt_t i=0; i<nEvents; i++) {
862 if (
pModel==0)
Log() << kFATAL <<
"Failed to get model Python object" <<
Endl;
901 +
")): output[i]=p\n";
919 for (
UInt_t i=0; i<nEvents; i++) {
929 if (
pModel==0)
Log() << kFATAL <<
"Failed to get model Python object" <<
Endl;
948 Log() <<
"Keras is a high-level API for the Theano and Tensorflow packages." <<
Endl;
949 Log() <<
"This method wraps the training and predictions steps of the Keras" <<
Endl;
950 Log() <<
"Python package for TMVA, so that dataloading, preprocessing and" <<
Endl;
951 Log() <<
"evaluation can be done within the TMVA system. To use this Keras" <<
Endl;
952 Log() <<
"interface, you have to generate a model with Keras first. Then," <<
Endl;
953 Log() <<
"this model can be loaded and trained in TMVA." <<
Endl;
964 PyRunString(
"keras_backend_is_set = keras.backend.backend() == \"tensorflow\"");
969 PyRunString(
"keras_backend_is_set = keras.backend.backend() == \"theano\"");
974 PyRunString(
"keras_backend_is_set = keras.backend.backend() == \"cntk\"");
#define REGISTER_METHOD(CLASS)
for example
long long Long64_t
Portable signed long integer 8 bytes.
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
R__EXTERN TSystem * gSystem
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Class that contains all the data information.
UInt_t GetNClasses() const
UInt_t GetNTargets() const
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Long64_t GetNTrainingEvents() const
void SetCurrentEvent(Long64_t ievt) const
PyGILState_STATE m_GILState
const char * GetName() const override
Types::EAnalysisType GetAnalysisType() const
const TString & GetWeightFileDir() const
const Event * GetEvent() const
DataSetInfo & DataInfo() const
virtual void TestClassification()
initialization
UInt_t GetNVariables() const
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
TrainingHistory fTrainHistory
const Event * GetTrainingEvent(Long64_t ievt) const
std::vector< Float_t > GetAllRegressionValues() override
Get al regression values in one call.
void GetHelpMessage() const override
void ProcessOptions() override
Function processing the options This is called only when creating the method before training not when...
std::vector< float > fOutput
std::vector< Float_t > & GetRegressionValues() override
Bool_t UseTFKeras() const
Int_t fTriesEarlyStopping
EBackendType
enumeration defining the used Keras backend
void SetupKerasModel(Bool_t loadTrainedModel)
Double_t GetMvaValue(Double_t *errLower, Double_t *errUpper0) override
void DeclareOptions() override
UInt_t GetNumValidationSamples()
Validation of the ValidationSize option.
void TestClassification() override
initialization
void SetupKerasModelForEval()
Setting up model for evaluation Add here some needed optimizations like disabling eager execution.
std::vector< Float_t > GetAllMulticlassValues() override
Get all multi-class values.
bool fModelIsSetupForEval
TString fNumValidationString
std::vector< float > fVals
std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress) override
get all the MVA values for the events of the current Data type
TString GetKerasBackendName()
MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
TString fLearningRateSchedule
std::vector< Float_t > & GetMulticlassValues() override
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t) override
void InitEvaluation(size_t nEvents)
void Init() override
Initialization function called from MethodBase::SetupMethod() Note that option string are not yet fil...
void ReadModelFromFile() override
EBackendType GetKerasBackend()
Get the Keras backend (can be: TensorFlow, Theano or CNTK)
TString fFilenameTrainedModel
Virtual base class for all TMVA method based on Python.
static int PyIsInitialized()
Check Python interpreter initialization status.
static PyObject * fGlobalNS
void PyRunString(TString code, TString errorMessage="Failed to run python code", int start=256)
Execute Python code from string.
void AddValue(TString Property, Int_t stage, Double_t value)
Singleton class for Global types used by TMVA.
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
TObjArray * Tokenize(const TString &delim) const
This function is used to isolate sequential tokens in a TString.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
virtual const char * Getenv(const char *env)
Get environment variable.
std::string ToString(const T &val)
Utility function for conversion to strings.
create variable transformations
MsgLogger & Endl(MsgLogger &ml)