******************************************************************************
*Tree :sig_tree : tree *
*Entries : 10000 : Total = 1177229 bytes File Size = 785298 *
* : : Tree compression factor = 1.48 *
******************************************************************************
*Br 0 :Type : Type/F *
*Entries : 10000 : Total Size= 40556 bytes File Size = 307 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 130.54 *
*............................................................................*
*Br 1 :lepton_pT : lepton_pT/F *
*Entries : 10000 : Total Size= 40581 bytes File Size = 30464 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.32 *
*............................................................................*
*Br 2 :lepton_eta : lepton_eta/F *
*Entries : 10000 : Total Size= 40586 bytes File Size = 28650 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.40 *
*............................................................................*
*Br 3 :lepton_phi : lepton_phi/F *
*Entries : 10000 : Total Size= 40586 bytes File Size = 30508 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.31 *
*............................................................................*
*Br 4 :missing_energy_magnitude : missing_energy_magnitude/F *
*Entries : 10000 : Total Size= 40656 bytes File Size = 35749 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.12 *
*............................................................................*
*Br 5 :missing_energy_phi : missing_energy_phi/F *
*Entries : 10000 : Total Size= 40626 bytes File Size = 36766 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.09 *
*............................................................................*
*Br 6 :jet1_pt : jet1_pt/F *
*Entries : 10000 : Total Size= 40571 bytes File Size = 32298 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.24 *
*............................................................................*
*Br 7 :jet1_eta : jet1_eta/F *
*Entries : 10000 : Total Size= 40576 bytes File Size = 28467 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.41 *
*............................................................................*
*Br 8 :jet1_phi : jet1_phi/F *
*Entries : 10000 : Total Size= 40576 bytes File Size = 30399 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.32 *
*............................................................................*
*Br 9 :jet1_b-tag : jet1_b-tag/F *
*Entries : 10000 : Total Size= 40586 bytes File Size = 5087 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 7.88 *
*............................................................................*
*Br 10 :jet2_pt : jet2_pt/F *
*Entries : 10000 : Total Size= 40571 bytes File Size = 31561 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.27 *
*............................................................................*
*Br 11 :jet2_eta : jet2_eta/F *
*Entries : 10000 : Total Size= 40576 bytes File Size = 28616 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.40 *
*............................................................................*
*Br 12 :jet2_phi : jet2_phi/F *
*Entries : 10000 : Total Size= 40576 bytes File Size = 30547 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.31 *
*............................................................................*
*Br 13 :jet2_b-tag : jet2_b-tag/F *
*Entries : 10000 : Total Size= 40586 bytes File Size = 5031 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 7.97 *
*............................................................................*
*Br 14 :jet3_pt : jet3_pt/F *
*Entries : 10000 : Total Size= 40571 bytes File Size = 30642 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.31 *
*............................................................................*
*Br 15 :jet3_eta : jet3_eta/F *
*Entries : 10000 : Total Size= 40576 bytes File Size = 28955 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.38 *
*............................................................................*
*Br 16 :jet3_phi : jet3_phi/F *
*Entries : 10000 : Total Size= 40576 bytes File Size = 30433 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.32 *
*............................................................................*
*Br 17 :jet3_b-tag : jet3_b-tag/F *
*Entries : 10000 : Total Size= 40586 bytes File Size = 4879 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 8.22 *
*............................................................................*
*Br 18 :jet4_pt : jet4_pt/F *
*Entries : 10000 : Total Size= 40571 bytes File Size = 29189 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.37 *
*............................................................................*
*Br 19 :jet4_eta : jet4_eta/F *
*Entries : 10000 : Total Size= 40576 bytes File Size = 29311 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.37 *
*............................................................................*
*Br 20 :jet4_phi : jet4_phi/F *
*Entries : 10000 : Total Size= 40576 bytes File Size = 30525 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.31 *
*............................................................................*
*Br 21 :jet4_b-tag : jet4_b-tag/F *
*Entries : 10000 : Total Size= 40586 bytes File Size = 4725 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 8.48 *
*............................................................................*
*Br 22 :m_jj : m_jj/F *
*Entries : 10000 : Total Size= 40556 bytes File Size = 34991 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.15 *
*............................................................................*
*Br 23 :m_jjj : m_jjj/F *
*Entries : 10000 : Total Size= 40561 bytes File Size = 34460 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.16 *
*............................................................................*
*Br 24 :m_lv : m_lv/F *
*Entries : 10000 : Total Size= 40556 bytes File Size = 32232 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.24 *
*............................................................................*
*Br 25 :m_jlv : m_jlv/F *
*Entries : 10000 : Total Size= 40561 bytes File Size = 34598 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.16 *
*............................................................................*
*Br 26 :m_bb : m_bb/F *
*Entries : 10000 : Total Size= 40556 bytes File Size = 35012 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.14 *
*............................................................................*
*Br 27 :m_wbb : m_wbb/F *
*Entries : 10000 : Total Size= 40561 bytes File Size = 34493 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.16 *
*............................................................................*
*Br 28 :m_wwbb : m_wwbb/F *
*Entries : 10000 : Total Size= 40566 bytes File Size = 34410 *
*Baskets : 1 : Basket Size= 1500672 bytes Compression= 1.16 *
*............................................................................*
DataSetInfo : [dataset] : Added class "Signal"
: Add Tree sig_tree of type Signal with 10000 events
DataSetInfo : [dataset] : Added class "Background"
: Add Tree bkg_tree of type Background with 10000 events
Factory : Booking method: ␛[1mLikelihood␛[0m
:
Factory : Booking method: ␛[1mFisher␛[0m
:
Factory : Booking method: ␛[1mBDT␛[0m
:
: Rebuilding Dataset dataset
: Building event vectors for type 2 Signal
: Dataset[dataset] : create input formulas for tree sig_tree
: Building event vectors for type 2 Background
: Dataset[dataset] : create input formulas for tree bkg_tree
DataSetFactory : [dataset] : Number of events in input trees
:
:
: Number of training and testing events
: ---------------------------------------------------------------------------
: Signal -- training events : 7000
: Signal -- testing events : 3000
: Signal -- training and testing events: 10000
: Background -- training events : 7000
: Background -- testing events : 3000
: Background -- training and testing events: 10000
:
DataSetInfo : Correlation matrix (Signal):
: ----------------------------------------------------------------
: m_jj m_jjj m_lv m_jlv m_bb m_wbb m_wwbb
: m_jj: +1.000 +0.777 +0.010 +0.107 +0.036 +0.517 +0.532
: m_jjj: +0.777 +1.000 +0.006 +0.083 +0.157 +0.682 +0.669
: m_lv: +0.010 +0.006 +1.000 +0.111 -0.026 +0.011 +0.023
: m_jlv: +0.107 +0.083 +0.111 +1.000 +0.325 +0.550 +0.555
: m_bb: +0.036 +0.157 -0.026 +0.325 +1.000 +0.463 +0.347
: m_wbb: +0.517 +0.682 +0.011 +0.550 +0.463 +1.000 +0.912
: m_wwbb: +0.532 +0.669 +0.023 +0.555 +0.347 +0.912 +1.000
: ----------------------------------------------------------------
DataSetInfo : Correlation matrix (Background):
: ----------------------------------------------------------------
: m_jj m_jjj m_lv m_jlv m_bb m_wbb m_wwbb
: m_jj: +1.000 +0.804 +0.017 +0.125 +0.007 +0.381 +0.394
: m_jjj: +0.804 +1.000 +0.025 +0.159 +0.153 +0.535 +0.520
: m_lv: +0.017 +0.025 +1.000 +0.114 +0.042 +0.064 +0.069
: m_jlv: +0.125 +0.159 +0.114 +1.000 +0.286 +0.592 +0.542
: m_bb: +0.007 +0.153 +0.042 +0.286 +1.000 +0.623 +0.441
: m_wbb: +0.381 +0.535 +0.064 +0.592 +0.623 +1.000 +0.878
: m_wwbb: +0.394 +0.520 +0.069 +0.542 +0.441 +0.878 +1.000
: ----------------------------------------------------------------
DataSetFactory : [dataset] :
:
Factory : Booking method: ␛[1mDNN_GPU␛[0m
:
: Parsing option string:
: ... "!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:WeightInitialization=XAVIER:InputLayout=1|1|7:BatchLayout=1|128|7:Layout=DENSE|64|TANH,DENSE|64|TANH,DENSE|64|TANH,DENSE|64|TANH,DENSE|1|LINEAR:TrainingStrategy=LearningRate=1e-3,Momentum=0.9,ConvergenceSteps=10,BatchSize=128,TestRepetitions=1,MaxEpochs=20,WeightDecay=1e-4,Regularization=None,Optimizer=ADAM,ADAM_beta1=0.9,ADAM_beta2=0.999,ADAM_eps=1.E-7,DropConfig=0.0+0.0+0.0+0.:Architecture=GPU"
: The following options are set:
: - By User:
: <none>
: - Default:
: Boost_num: "0" [Number of times the classifier will be boosted]
: Parsing option string:
: ... "!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:WeightInitialization=XAVIER:InputLayout=1|1|7:BatchLayout=1|128|7:Layout=DENSE|64|TANH,DENSE|64|TANH,DENSE|64|TANH,DENSE|64|TANH,DENSE|1|LINEAR:TrainingStrategy=LearningRate=1e-3,Momentum=0.9,ConvergenceSteps=10,BatchSize=128,TestRepetitions=1,MaxEpochs=20,WeightDecay=1e-4,Regularization=None,Optimizer=ADAM,ADAM_beta1=0.9,ADAM_beta2=0.999,ADAM_eps=1.E-7,DropConfig=0.0+0.0+0.0+0.:Architecture=GPU"
: The following options are set:
: - By User:
: V: "True" [Verbose output (short form of "VerbosityLevel" below - overrides the latter one)]
: VarTransform: "G" [List of variable transformations performed before training, e.g., "D_Background,P_Signal,G,N_AllClasses" for: "Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)"]
: H: "False" [Print method-specific help message]
: InputLayout: "1|1|7" [The Layout of the input]
: BatchLayout: "1|128|7" [The Layout of the batch]
: Layout: "DENSE|64|TANH,DENSE|64|TANH,DENSE|64|TANH,DENSE|64|TANH,DENSE|1|LINEAR" [Layout of the network.]
: ErrorStrategy: "CROSSENTROPY" [Loss function: Mean squared error (regression) or cross entropy (binary classification).]
: WeightInitialization: "XAVIER" [Weight initialization strategy]
: Architecture: "GPU" [Which architecture to perform the training on.]
: TrainingStrategy: "LearningRate=1e-3,Momentum=0.9,ConvergenceSteps=10,BatchSize=128,TestRepetitions=1,MaxEpochs=20,WeightDecay=1e-4,Regularization=None,Optimizer=ADAM,ADAM_beta1=0.9,ADAM_beta2=0.999,ADAM_eps=1.E-7,DropConfig=0.0+0.0+0.0+0." [Defines the training strategies.]
: - Default:
: VerbosityLevel: "Default" [Verbosity level]
: CreateMVAPdfs: "False" [Create PDFs for classifier outputs (signal and background)]
: IgnoreNegWeightsInTraining: "False" [Events with negative weights are ignored in the training (but are included for testing and performance evaluation)]
: RandomSeed: "0" [Random seed used for weight initialization and batch shuffling]
: ValidationSize: "20%" [Part of the training data to use for validation. Specify as 0.2 or 20% to use a fifth of the data set as validation set. Specify as 100 to use exactly 100 events. (Default: 20%)]
DNN_GPU : [dataset] : Create Transformation "G" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'm_jj' <---> Output : variable 'm_jj'
: Input : variable 'm_jjj' <---> Output : variable 'm_jjj'
: Input : variable 'm_lv' <---> Output : variable 'm_lv'
: Input : variable 'm_jlv' <---> Output : variable 'm_jlv'
: Input : variable 'm_bb' <---> Output : variable 'm_bb'
: Input : variable 'm_wbb' <---> Output : variable 'm_wbb'
: Input : variable 'm_wwbb' <---> Output : variable 'm_wwbb'
: Will now use the GPU architecture !
import os
import ROOT
TMVA = ROOT.TMVA
TFile = ROOT.TFile
useLikelihood = True
useLikelihoodKDE = False
useFischer = True
useMLP = False
useBDT = True
useDL = True
useKeras = True
if ROOT.gSystem.GetFromPipe("root-config --has-tmva-pymva") == "yes":
else:
useKeras = False
if useKeras:
try:
pass
except:
ROOT.Warning("TMVA_Higgs_Classification", "Skip using Keras since tensorflow is not available")
useKeras = False
outputFile =
TFile.Open(
"Higgs_ClassificationOutput.root",
"RECREATE")
"TMVA_Higgs_Classification", outputFile, V=False, ROC=True, Silent=False, Color=True, AnalysisType="Classification"
)
inputFileName = str(ROOT.gROOT.GetTutorialDir()) + "/machine_learning/data/Higgs_data.root"
if inputFile is None:
raise FileNotFoundError("Input file is not found - exit")
signalTree = inputFile.Get("sig_tree")
backgroundTree = inputFile.Get("bkg_tree")
signalTree.Print()
loader.AddVariable("m_jj")
loader.AddVariable("m_jjj")
loader.AddVariable("m_lv")
loader.AddVariable("m_jlv")
loader.AddVariable("m_bb")
loader.AddVariable("m_wbb")
loader.AddVariable("m_wwbb")
signalWeight = 1.0
backgroundWeight = 1.0
loader.AddSignalTree(signalTree, signalWeight)
loader.AddBackgroundTree(backgroundTree, backgroundWeight)
mycuts = ROOT.TCut("")
mycutb = ROOT.TCut("")
loader.PrepareTrainingAndTestTree(
mycuts, mycutb, nTrain_Signal=7000, nTrain_Background=7000, SplitMode="Random", NormMode="NumEvents", V=False
)
if useLikelihood:
factory.BookMethod(
loader,
TMVA.Types.kLikelihood,
"Likelihood",
H=True,
V=False,
TransformOutput=True,
PDFInterpol="Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10",
NSmooth=1,
NAvEvtPerBin=50,
)
if useLikelihoodKDE:
factory.BookMethod(
loader,
TMVA.Types.kLikelihood,
"LikelihoodKDE",
H=False,
V=False,
TransformOutput=False,
PDFInterpol="KDE",
KDEtype="Gauss",
KDEiter="Adaptive",
KDEFineFactor=0.3,
KDEborder=None,
NAvEvtPerBin=50,
)
if useFischer:
factory.BookMethod(
loader,
TMVA.Types.kFisher,
"Fisher",
H=True,
V=False,
Fisher=True,
VarTransform=None,
CreateMVAPdfs=True,
PDFInterpolMVAPdf="Spline2",
NbinsMVAPdf=50,
NsmoothMVAPdf=10,
)
if useBDT:
factory.BookMethod(
loader,
TMVA.Types.kBDT,
"BDT",
V=False,
NTrees=200,
MinNodeSize="2.5%",
MaxDepth=2,
BoostType="AdaBoost",
AdaBoostBeta=0.5,
UseBaggedBoost=True,
BaggedSampleFraction=0.5,
SeparationType="GiniIndex",
nCuts=20,
)
if useMLP:
factory.BookMethod(
loader,
TMVA.Types.kMLP,
"MLP",
H=False,
V=False,
NeuronType="tanh",
VarTransform="N",
NCycles=100,
HiddenLayers="N+5",
TestRate=5,
UseRegulator=False,
)
if useDL:
useDLGPU = ROOT.gSystem.GetFromPipe("root-config --has-tmva-gpu") == "yes"
training1 = ROOT.TString(
"LearningRate=1e-3,Momentum=0.9,"
"ConvergenceSteps=10,BatchSize=128,TestRepetitions=1,"
"MaxEpochs=20,WeightDecay=1e-4,Regularization=None,"
"Optimizer=ADAM,ADAM_beta1=0.9,ADAM_beta2=0.999,ADAM_eps=1.E-7,"
"DropConfig=0.0+0.0+0.0+0."
)
dnnMethodName = ROOT.TString("DNN_CPU")
if useDLGPU:
arch = "GPU"
dnnMethodName = "DNN_GPU"
else:
arch = "CPU"
factory.BookMethod(
loader,
TMVA.Types.kDL,
dnnMethodName,
H=False,
V=True,
ErrorStrategy="CROSSENTROPY",
VarTransform="G",
WeightInitialization="XAVIER",
InputLayout="1|1|7",
BatchLayout="1|128|7",
Layout="DENSE|64|TANH,DENSE|64|TANH,DENSE|64|TANH,DENSE|64|TANH,DENSE|1|LINEAR",
TrainingStrategy=training1,
Architecture=arch,
)
if useKeras:
ROOT.Info("TMVA_Higgs_Classification", "Building Deep Learning keras model")
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
model = Sequential()
model.add(Dense(64, activation="relu", input_dim=7))
model.add(Dense(64, activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(2, activation="sigmoid"))
model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.001), weighted_metrics=["accuracy"])
model.save("model_higgs.keras")
model.summary()
if not os.path.exists("model_higgs.keras"):
raise FileNotFoundError("Error creating Keras model file - skip using Keras")
else:
ROOT.Info("TMVA_Higgs_Classification", "Booking Deep Learning keras model")
factory.BookMethod(
loader,
TMVA.Types.kPyKeras,
"PyKeras",
H=True,
V=False,
VarTransform=None,
FilenameModel="model_higgs.keras",
FilenameTrainedModel="trained_model_higgs.keras",
NumEpochs=20,
BatchSize=100,
)
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()
c1 = factory.GetROCCurve(loader)
c1.Draw()
outputFile.Close()
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
This is the main MVA steering class.
static void PyInitialize()
Initialize Python interpreter.