53 #define MinNoTrainingEvents 10 72 auto roc =
GetROC(iClass, type);
73 auto inte = roc->GetROCIntegral();
107 TString hLine =
"--------------------------------------------------- :";
109 fLogger << kINFO << hLine <<
Endl;
110 fLogger << kINFO <<
"DataSet MVA :" <<
Endl;
111 fLogger << kINFO <<
"Name: Method/Title: ROC-integ :" <<
Endl;
112 fLogger << kINFO << hLine <<
Endl;
118 fLogger << kINFO << hLine <<
Endl;
145 :
TMVA::
Envelope(
"Classification", dataloader, file, options), fAnalysisType(
Types::kClassification),
189 if (meth.GetValue<
TString>(
"MethodName") == methodname && meth.GetValue<
TString>(
"MethodTitle") == methodtitle)
190 return meth.GetValue<
TString>(
"MethodOptions");
216 auto methodname = fMethods[workerID].GetValue<
TString>(
"MethodName");
217 auto methodtitle = fMethods[workerID].GetValue<
TString>(
"MethodTitle");
229 TString hLine =
"--------------------------------------------------- :";
230 Log() << kINFO << hLine <<
Endl;
231 Log() << kINFO <<
"DataSet MVA :" <<
Endl;
232 Log() << kINFO <<
"Name: Method/Title: ROC-integ :" <<
Endl;
233 Log() << kINFO << hLine <<
Endl;
236 Log() << kINFO <<
Form(
"%-20s %-15s %#1.3f :",
r.GetDataLoaderName().Data(),
237 Form(
"%s/%s",
r.GetMethodName().Data(),
r.GetMethodTitle().Data()),
r.GetROCIntegral())
240 Log() << kINFO << hLine <<
Endl;
242 Log() << kINFO <<
"-----------------------------------------------------" <<
Endl;
243 Log() << kHEADER <<
"Evaluation done." << Endl <<
Endl;
245 Log() << kINFO <<
"-----------------------------------------------------" <<
Endl;
246 Log() << kINFO <<
"Evaluation done." <<
Endl;
261 auto method =
GetMethod(methodname, methodtitle);
264 <<
Form(
"Trying to train method %s %s that maybe is not booked.", methodname.
Data(), methodtitle.
Data())
272 method->DataInfo().GetNClasses() < 2)
273 Log() << kFATAL <<
"You want to do classification training, but specified less than two classes." <<
Endl;
279 Log() << kWARNING <<
"Method " << method->GetMethodName() <<
" not trained (training tree has less entries [" 284 Log() << kHEADER <<
"Train method: " << method->GetMethodName() <<
" for Classification" << Endl <<
Endl;
285 method->TrainMethod();
286 Log() << kHEADER <<
"Training finished" << Endl <<
Endl;
299 if (!
HasMethod(methodname, methodtitle)) {
300 std::cout << methodname <<
" " << methodtitle << std::endl;
301 Log() << kERROR <<
"Trying to get method not booked." <<
Endl;
311 Log() << kFATAL <<
"No input data for the training provided!" <<
Endl;
313 Log() << kHEADER <<
"Loading booked method: " <<
gTools().
Color(
"bold") << methodname <<
" " << methodtitle
321 conf->DeclareOptionRef(boostNum = 0,
"Boost_num",
"Number of times the classifier will be boosted");
322 conf->ParseOptions();
339 Log() << kDEBUG <<
"Boost Number is " << boostNum <<
" > 0: train boosted classifier" <<
Endl;
344 Log() << kFATAL <<
"Method with type kBoost cannot be casted to MethodCategory. /Classification" <<
Endl;
363 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /Classification" <<
Endl;
375 Log() << kWARNING <<
"Method " << method->
GetMethodTypeName() <<
" is not capable of handling ";
404 if (methbase->GetMethodTypeName() == methodname && methbase->GetMethodName() == methodtitle) {
423 auto method =
GetMethod(methodname, methodtitle);
426 <<
Form(
"Trying to train method %s %s that maybe is not booked.", methodname.
Data(), methodtitle.
Data())
434 Log() << kHEADER <<
"Test method: " << method->GetMethodName() <<
" for Classification" 446 Int_t nmeth_used[2] = {0, 0};
448 std::vector<std::vector<TString>> mname(2);
449 std::vector<std::vector<Double_t>> sig(2),
sep(2), roc(2);
450 std::vector<std::vector<Double_t>> eff01(2), eff10(2), eff30(2), effArea(2);
451 std::vector<std::vector<Double_t>> eff01err(2), eff10err(2), eff30err(2);
452 std::vector<std::vector<Double_t>> trainEff01(2), trainEff10(2), trainEff30(2);
454 method->SetFile(
fFile.get());
459 methodNoCuts = method;
461 Log() << kHEADER <<
"Evaluate classifier: " << method->GetMethodName() << Endl <<
Endl;
462 isel = (method->GetMethodTypeName().Contains(
"Variable")) ? 1 : 0;
465 method->TestClassification();
468 mname[isel].push_back(method->GetMethodName());
469 sig[isel].push_back(method->GetSignificance());
470 sep[isel].push_back(method->GetSeparation());
471 roc[isel].push_back(method->GetROCIntegral());
474 eff01[isel].push_back(method->GetEfficiency(
"Efficiency:0.01",
Types::kTesting, err));
475 eff01err[isel].push_back(err);
476 eff10[isel].push_back(method->GetEfficiency(
"Efficiency:0.10",
Types::kTesting, err));
477 eff10err[isel].push_back(err);
478 eff30[isel].push_back(method->GetEfficiency(
"Efficiency:0.30",
Types::kTesting, err));
479 eff30err[isel].push_back(err);
480 effArea[isel].push_back(method->GetEfficiency(
"",
Types::kTesting, err));
482 trainEff01[isel].push_back(method->GetTrainingEfficiency(
"Efficiency:0.01"));
483 trainEff10[isel].push_back(method->GetTrainingEfficiency(
"Efficiency:0.10"));
484 trainEff30[isel].push_back(method->GetTrainingEfficiency(
"Efficiency:0.30"));
489 Log() << kDEBUG <<
"\tWrite evaluation histograms to file" <<
Endl;
495 for (
Int_t k = 0; k < 2; k++) {
496 std::vector<std::vector<Double_t>> vtemp;
497 vtemp.push_back(effArea[k]);
498 vtemp.push_back(eff10[k]);
499 vtemp.push_back(eff01[k]);
500 vtemp.push_back(eff30[k]);
501 vtemp.push_back(eff10err[k]);
502 vtemp.push_back(eff01err[k]);
503 vtemp.push_back(eff30err[k]);
504 vtemp.push_back(trainEff10[k]);
505 vtemp.push_back(trainEff01[k]);
506 vtemp.push_back(trainEff30[k]);
507 vtemp.push_back(sig[k]);
508 vtemp.push_back(
sep[k]);
509 vtemp.push_back(roc[k]);
510 std::vector<TString> vtemps = mname[k];
512 effArea[k] = vtemp[0];
516 eff10err[k] = vtemp[4];
517 eff01err[k] = vtemp[5];
518 eff30err[k] = vtemp[6];
519 trainEff10[k] = vtemp[7];
520 trainEff01[k] = vtemp[8];
521 trainEff30[k] = vtemp[9];
535 const Int_t nmeth = methodNoCuts == NULL ? 0 : 1;
541 std::vector<Double_t> rvec;
548 std::vector<TString> *theVars =
new std::vector<TString>;
549 std::vector<ResultsClassification *> mvaRes;
552 theVars->back().ReplaceAll(
"MVA_",
"");
553 mvaRes.push_back(dynamic_cast<ResultsClassification *>(
563 DataSet *defDs = method->fDataSetInfo.GetDataSet();
570 for (
Int_t im = 0; im < nmeth; im++) {
574 Log() << kWARNING <<
"Found NaN return value in event: " << ievt <<
" for method \"" 580 for (
Int_t iv = 0; iv < nvar; iv++)
582 if (method->fDataSetInfo.IsSignal(ev)) {
591 for (
Int_t im = 0; im < nmeth; im++) {
592 for (
Int_t jm = im; jm < nmeth; jm++) {
593 if ((dvec[im] - rvec[im]) * (dvec[jm] - rvec[jm]) > 0) {
616 if (corrMatS != 0 && corrMatB != 0) {
621 for (
Int_t im = 0; im < nmeth; im++) {
622 for (
Int_t jm = 0; jm < nmeth; jm++) {
623 mvaMatS(im, jm) = (*corrMatS)(im, jm);
624 mvaMatB(im, jm) = (*corrMatB)(im, jm);
629 std::vector<TString> theInputVars;
632 for (
Int_t iv = 0; iv < nvar; iv++) {
633 theInputVars.push_back(method->fDataSetInfo.GetVariableInfo(iv).GetLabel());
634 for (
Int_t jm = 0; jm < nmeth; jm++) {
635 varmvaMatS(iv, jm) = (*corrMatS)(nmeth + iv, jm);
636 varmvaMatB(iv, jm) = (*corrMatB)(nmeth + iv, jm);
642 Log() << kINFO <<
Form(
"Dataset[%s] : ", method->fDataSetInfo.GetName())
643 <<
"Inter-MVA correlation matrix (signal):" << Endl;
647 Log() << kINFO <<
Form(
"Dataset[%s] : ", method->fDataSetInfo.GetName())
648 <<
"Inter-MVA correlation matrix (background):" << Endl;
653 Log() << kINFO <<
Form(
"Dataset[%s] : ", method->fDataSetInfo.GetName())
654 <<
"Correlations between input variables and MVA response (signal):" << Endl;
658 Log() << kINFO <<
Form(
"Dataset[%s] : ", method->fDataSetInfo.GetName())
659 <<
"Correlations between input variables and MVA response (background):" << Endl;
663 Log() << kWARNING <<
Form(
"Dataset[%s] : ", method->fDataSetInfo.GetName())
664 <<
"<TestAllMethods> cannot compute correlation matrices" << Endl;
667 Log() << kINFO <<
Form(
"Dataset[%s] : ", method->fDataSetInfo.GetName())
668 <<
"The following \"overlap\" matrices contain the fraction of events for which " << Endl;
669 Log() << kINFO <<
Form(
"Dataset[%s] : ", method->fDataSetInfo.GetName())
670 <<
"the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" << Endl;
671 Log() << kINFO <<
Form(
"Dataset[%s] : ", method->fDataSetInfo.GetName())
672 <<
"An event is signal-like, if its MVA output exceeds the following value:" << Endl;
674 Log() << kINFO <<
Form(
"Dataset[%s] : ", method->fDataSetInfo.GetName())
675 <<
"which correspond to the working point: eff(signal) = 1 - eff(background)" << Endl;
679 Log() << kINFO <<
Form(
"Dataset[%s] : ", method->fDataSetInfo.GetName())
680 <<
"Note: no correlations and overlap with cut method are provided at present" << Endl;
684 Log() << kINFO <<
Form(
"Dataset[%s] : ", method->fDataSetInfo.GetName())
685 <<
"Inter-MVA overlap matrix (signal):" << Endl;
689 Log() << kINFO <<
Form(
"Dataset[%s] : ", method->fDataSetInfo.GetName())
690 <<
"Inter-MVA overlap matrix (background):" << Endl;
716 TString hLine =
"------------------------------------------------------------------------------------------" 717 "-------------------------";
718 Log() << kINFO <<
"Evaluation results ranked by best signal efficiency and purity (area)" <<
Endl;
719 Log() << kINFO << hLine <<
Endl;
720 Log() << kINFO <<
"DataSet MVA " <<
Endl;
721 Log() << kINFO <<
"Name: Method: ROC-integ" <<
Endl;
723 Log() << kDEBUG << hLine <<
Endl;
724 for (
Int_t k = 0; k < 2; k++) {
725 if (k == 1 && nmeth_used[k] > 0) {
726 Log() << kINFO << hLine <<
Endl;
727 Log() << kINFO <<
"Input Variables: " << Endl << hLine <<
Endl;
729 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
731 TString methodName = mname[k][i];
739 std::vector<Bool_t> *mvaResType =
dynamic_cast<ResultsClassification *
>(results)->GetValueVectorTypes();
742 if (mvaResType->size() != 0) {
746 if (
sep[k][i] < 0 || sig[k][i] < 0) {
751 Log() << kINFO <<
Form(
"%-13s %-15s: %#1.3f", datasetName.
Data(), methodName.
Data(), rocIntegral)
756 Log() << kINFO << hLine <<
Endl;
758 Log() << kINFO <<
"Testing efficiency compared to training efficiency (overtraining check)" <<
Endl;
759 Log() << kINFO << hLine <<
Endl;
761 <<
"DataSet MVA Signal efficiency: from test sample (from training sample) " 763 Log() << kINFO <<
"Name: Method: @B=0.01 @B=0.10 @B=0.30 " 765 Log() << kINFO << hLine <<
Endl;
766 for (
Int_t k = 0; k < 2; k++) {
767 if (k == 1 && nmeth_used[k] > 0) {
768 Log() << kINFO << hLine <<
Endl;
769 Log() << kINFO <<
"Input Variables: " << Endl << hLine <<
Endl;
771 for (
Int_t i = 0; i < nmeth_used[k]; i++) {
773 mname[k][i].ReplaceAll(
"Variable_",
"");
775 Log() << kINFO <<
Form(
"%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
776 method->fDataSetInfo.GetName(), (
const char *)mname[k][i], eff01[k][i],
777 trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i])
781 Log() << kINFO << hLine <<
Endl;
789 auto &fResult =
GetResults(methodname, methodtitle);
797 fResult.fMvaTest[0] = rocCurveTest->GetMvas();
798 TString className = method->DataInfo().GetClassInfo(0)->GetName();
799 fResult.fClassNames.push_back(className);
821 Log() << kFATAL <<
"No Classification results available" <<
Endl;
836 if (result.IsMethod(methodname, methodtitle))
840 result.
fMethod[
"MethodName"] = methodname;
841 result.
fMethod[
"MethodTitle"] = methodtitle;
843 fResults.push_back(result);
844 return fResults.back();
857 Log() << kERROR <<
Form(
"Given class number (iClass = %i) does not exist. There are %i classes in dataset.",
867 std::vector<Bool_t> *mvaResTypes =
dynamic_cast<ResultsClassification *
>(results)->GetValueVectorTypes();
868 std::vector<Float_t> mvaResWeights;
871 mvaResWeights.reserve(eventCollection.size());
872 for (
auto ev : eventCollection) {
873 mvaResWeights.push_back(ev->GetWeight());
876 rocCurve =
new TMVA::ROCCurve(*mvaRes, *mvaResTypes, mvaResWeights);
879 std::vector<Float_t> mvaRes;
880 std::vector<Bool_t> mvaResTypes;
881 std::vector<Float_t> mvaResWeights;
883 std::vector<std::vector<Float_t>> *rawMvaRes =
dynamic_cast<ResultsMulticlass *
>(results)->GetValueVector();
888 mvaRes.reserve(rawMvaRes->size());
889 for (
auto item : *rawMvaRes) {
890 mvaRes.push_back(item[iClass]);
894 mvaResTypes.reserve(eventCollection.size());
895 mvaResWeights.reserve(eventCollection.size());
896 for (
auto ev : eventCollection) {
897 mvaResTypes.push_back(ev->GetClass() == iClass);
898 mvaResWeights.push_back(ev->GetWeight());
901 rocCurve =
new TMVA::ROCCurve(mvaRes, mvaResTypes, mvaResWeights);
920 <<
Form(
"ROCCurve object was not created in MethodName = %s MethodTitle = %s not found with Dataset = %s ",
std::map< UInt_t, std::vector< std::tuple< Float_t, Float_t, Bool_t > > > fMvaTest
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
void SetModelPersistence(Bool_t status)
virtual const char * GetName() const
Returns name of object.
Bool_t IsCutsMethod(TMVA::MethodBase *method)
UInt_t GetNVariables() const
#define MinNoTrainingEvents
TString GetMethodOptions(TString methodname, TString methodtitle)
Principal Components Analysis (PCA)
void SetNWorkers(unsigned n)
Types::EAnalysisType fAnalysisType
vector of objects with booked methods
Double_t RealTime()
Stop the stopwatch (if it is running) and return the realtime (in seconds) passed between the start a...
MsgLogger & Endl(MsgLogger &ml)
DataSetInfo & GetDataLoaderDataSetInfo()
Utility method to get TMVA::DataSetInfo reference from the DataLoader.
Singleton class for Global types used by TMVA.
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
Double_t GetROCIntegral(const UInt_t points=41)
Calculates the ROC integral (AUC)
T GetValue(const TString &key)
TString & ReplaceAll(const TString &s1, const TString &s2)
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Bool_t HasMethodObject(TString methodname, TString methodtitle, Int_t &index)
virtual void SetName(const char *name)
Set the name of the TNamed.
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Double_t GetROCIntegral(UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
virtual int MakeDirectory(const char *name)
Make a directory.
TAxis * GetYaxis() const
Get y axis of the graph.
Virtual base Class for all MVA method.
overwrite existing object with same name
MethodBase * GetMethod(TString methodname, TString methodtitle)
virtual void SetTitle(const char *title="")
Set graph title.
UInt_t GetNClasses() const
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
void SetSilentFile(Bool_t status)
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
std::shared_ptr< TFile > fFile
data
Bool_t IsSilentFile()
Method to see if a file is available to save results.
std::map< UInt_t, std::vector< std::tuple< Float_t, Float_t, Bool_t > > > fMvaTrain
TDirectory * RootBaseDir()
Utility method to get base dir directory from current file.
Bool_t HasMethod(TString methodname, TString methodtitle)
function to check methods booked
ClassificationResult & operator=(const ClassificationResult &r)
static void InhibitOutput()
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
DataSetInfo & fDataSetInfo
virtual void TrainMethod(TString methodname, TString methodtitle)
virtual void ParseOptions()
options parser
Abstract base class for all high level ml algorithms, you can book ml methods like BDT...
void SetupMethod()
setup of methods
DataSetInfo & DataInfo() const
std::vector< IMethod * > fIMethods
void SetDrawProgressBar(Bool_t d)
Classification(DataLoader *loader, TFile *file, TString options)
Class for boosting a TMVA method.
TMatrixT< Double_t > TMatrixD
Class that contains all the data information.
virtual void ParseOptions()
Method to parse the internal option string.
virtual void TestMethod(TString methodname, TString methodtitle)
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
std::string GetMethodName(TCppMethod_t)
R__EXTERN TSystem * gSystem
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
const char * GetName() const
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Double_t GetROCIntegral(TString methodname, TString methodtitle, UInt_t iClass=0)
class TMVA::Config::VariablePlotting fVariablePlotting
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
const TMatrixD * GetCovarianceMatrix() const
const TString & GetMethodName() const
virtual void MakePrincipals()
Perform the principal components analysis.
void SetBoostedMethodName(TString methodName)
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
TAxis * GetXaxis() const
Get x axis of the graph.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Class for categorizing the phase space.
TMVA::ROCCurve * GetROC(TMVA::MethodBase *method, UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
A pseudo container class which is a generator of indices.
Bool_t fModelPersistence
file to save the results
std::shared_ptr< DataLoader > fDataLoader
Booked method information.
Class which takes the results of a multiclass classification.
void SetFile(TFile *file)
void SetCurrentType(Types::ETreeType type) const
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
ostringstream derivative to redirect and format output
const TString & GetOptions() const
Mother of all ROOT objects.
UInt_t fJobs
procpool object
void SetUseColor(Bool_t uc)
void SetConfigName(const char *n)
Interface for all concrete MVA method implementations.
Bool_t IsMethod(TString methodname, TString methodtitle)
Abstract ClassifierFactory template that handles arbitrary types.
TGraph * GetROCCurve(const UInt_t points=100)
Returns a new TGraph containing the ROC curve.
DataSetManager * fDataSetManager
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
TString GetMethodTypeName() const
TGraph * GetROCGraph(UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
Class that is the base-class for a vector of result.
void SetWeightFileDir(TString fileDir)
set directory of weight file
Double_t GetSignalReferenceCut() const
DataSetManager * GetDataLoaderDataSetManager()
Utility method to get TMVA::DataSetManager pointer from the DataLoader.
A Graph is a graphics object made of two arrays X and Y with npoints each.
DataInputHandler & GetDataLoaderDataInput()
Utility method to get TMVA::DataInputHandler reference from the DataLoader.
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
std::vector< ClassificationResult > & GetResults()
virtual void Evaluate()
Virtual method to be implemented with your algorithm.
static void EnableOutput()
const TString & GetTestvarName() const
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
TStopwatch fTimer
number of jobs to run some high level algorithm in parallel
std::vector< ClassificationResult > fResults
Types::EMVA GetMethodType() const
void CheckForUnusedOptions() const
checks for unused options in option string
const Event * GetEvent() const
const TString GetMethodTitle() const
std::vector< OptionMap > fMethods
TProcPool fWorkers
if true dont produce file output
const TString GetMethodName() const
virtual void SetAnalysisType(Types::EAnalysisType type)
Class that is the base-class for a vector of result.
void SetConfigDescription(const char *d)
ROCCurve * GetROC(UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
const char * Data() const