34void TMVA_Higgs_Classification() {
38 bool useLikelihood =
true;
39 bool useLikelihoodKDE =
false;
40 bool useFischer =
true;
45 bool usePyTorch =
true;
50 gSystem->Setenv(
"KERAS_BACKEND",
"tensorflow");
58 auto outputFile =
TFile::Open(
"Higgs_ClassificationOutput.root",
"RECREATE");
60 TMVA::Factory factory(
"TMVA_Higgs_Classification", outputFile,
61 "!V:ROC:!Silent:Color:AnalysisType=Classification" );
71 TString inputFileName =
gROOT->GetTutorialDir() +
"/machine_learning/data/Higgs_data.root";
73 TFile *inputFile =
nullptr;
75 if (!
gSystem->AccessPathName(inputFileName)) {
80 Error(
"TMVA_Higgs_Classification",
"Input file is not found - exit");
139 "nTrain_Signal=7000:nTrain_Background=7000:SplitMode=Random:NormMode=NumEvents:!V" );
153 "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
156if (useLikelihoodKDE) {
158 "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );
164 factory.BookMethod(loader,
TMVA::Types::kFisher,
"Fisher",
"H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
170 "!V:NTrees=200:MinNodeSize=2.5%:MaxDepth=2:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );
176 "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=N+5:TestRate=5:!UseRegulator" );
246 bool useDLGPU =
false;
252 TString inputLayoutString =
"InputLayout=1|1|7";
253 TString batchLayoutString=
"BatchLayout=1|128|7";
254 TString layoutString (
"Layout=DENSE|64|TANH,DENSE|64|TANH,DENSE|64|TANH,DENSE|64|TANH,DENSE|1|LINEAR");
257 TString training1(
"LearningRate=1e-3,Momentum=0.9,"
258 "ConvergenceSteps=10,BatchSize=128,TestRepetitions=1,"
259 "MaxEpochs=30,WeightDecay=1e-4,Regularization=None,"
260 "Optimizer=ADAM,ADAM_beta1=0.9,ADAM_beta2=0.999,ADAM_eps=1.E-7,"
261 "DropConfig=0.0+0.0+0.0+0.");
267 TString trainingStrategyString (
"TrainingStrategy=");
268 trainingStrategyString += training1;
272 TString dnnOptions (
"!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:"
273 "WeightInitialization=XAVIER");
274 dnnOptions.Append (
":"); dnnOptions.Append (inputLayoutString);
275 dnnOptions.Append (
":"); dnnOptions.Append (batchLayoutString);
276 dnnOptions.Append (
":"); dnnOptions.Append (layoutString);
277 dnnOptions.Append (
":"); dnnOptions.Append (trainingStrategyString);
279 TString dnnMethodName =
"DNN_CPU";
281 dnnOptions +=
":Architecture=GPU";
282 dnnMethodName =
"DNN_GPU";
284 dnnOptions +=
":Architecture=CPU";
293 Info(
"TMVA_Higgs_Classification",
"Building deep neural network with keras ");
297 m.AddLine(
"import tensorflow");
298 m.AddLine(
"from tensorflow.keras.models import Sequential");
299 m.AddLine(
"from tensorflow.keras.optimizers import Adam");
300 m.AddLine(
"from tensorflow.keras.layers import Input, Dense");
302 m.AddLine(
"model = Sequential() ");
303 m.AddLine(
"model.add(Dense(64, activation='relu',input_dim=7))");
304 m.AddLine(
"model.add(Dense(64, activation='relu'))");
305 m.AddLine(
"model.add(Dense(64, activation='relu'))");
306 m.AddLine(
"model.add(Dense(64, activation='relu'))");
307 m.AddLine(
"model.add(Dense(2, activation='sigmoid'))");
308 m.AddLine(
"model.compile(loss = 'binary_crossentropy', optimizer = Adam(learning_rate = 0.001), weighted_metrics = ['accuracy'])");
309 m.AddLine(
"model.save('Higgs_model.keras')");
310 m.AddLine(
"model.summary()");
312 m.SaveSource(
"make_higgs_model.py");
316 gSystem->Exec(python_exe +
" make_higgs_model.py");
318 if (
gSystem->AccessPathName(
"Higgs_model.keras")) {
319 Warning(
"TMVA_Higgs_Classification",
"Error creating Keras model file - skip using Keras");
322 Info(
"TMVA_Higgs_Classification",
"Booking tf.Keras Dense model");
325 "H:!V:VarTransform=None:FilenameModel=Higgs_model.keras:tf.keras:"
326 "FilenameTrainedModel=Higgs_trained_model.keras:NumEpochs=20:BatchSize=100:"
338 factory.TrainAllMethods();
346 factory.TestAllMethods();
348 factory.EvaluateAllMethods();
352 auto c1 = factory.GetROCCurve(loader);
double Double_t
Double 8 bytes.
Error("WriteTObject","The current directory (%s) is not associated with a file. The object (%s) has not been written.", GetName(), objname)
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
A specialized string object used for TTree selections.
TObject * Get(const char *namecycle) override
Return pointer to object identified by namecycle.
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
This is the main MVA steering class.
static void PyInitialize()
Initialize Python interpreter.
Class supporting a collection of lines with C++ code.
A TTree represents a columnar dataset.
void Print(Option_t *option="") const override
Print a summary of the tree contents.