77using std::stringstream;
95 fConvergerFitter( 0 ),
96 fSumOfWeightsSig( 0 ),
97 fSumOfWeightsBkg( 0 ),
99 fOutputDimensions( 0 )
113 fConvergerFitter( 0 ),
114 fSumOfWeightsSig( 0 ),
115 fSumOfWeightsBkg( 0 ),
117 fOutputDimensions( 0 )
131 fSumOfWeightsSig = 0;
132 fSumOfWeightsBkg = 0;
134 fFormulaStringP =
"";
135 fParRangeStringP =
"";
136 fFormulaStringT =
"";
137 fParRangeStringT =
"";
143 if (fMulticlassReturnVal ==
NULL) fMulticlassReturnVal =
new std::vector<Float_t>();
165 DeclareOptionRef( fFormulaStringP =
"(0)",
"Formula",
"The discrimination formula" );
166 DeclareOptionRef( fParRangeStringP =
"()",
"ParRanges",
"Parameter ranges" );
169 DeclareOptionRef( fFitMethod =
"MINUIT",
"FitMethod",
"Optimisation Method");
173 AddPreDefVal(
TString(
"MINUIT"));
175 DeclareOptionRef( fConverger =
"None",
"Converger",
"FitMethod uses Converger to improve result");
177 AddPreDefVal(
TString(
"MINUIT"));
186 fFormulaStringT = fFormulaStringP;
191 for (
UInt_t ipar=0; ipar<fNPars; ipar++) {
196 for (
Int_t ipar=fNPars; ipar<1000; ipar++) {
199 <<
"<CreateFormula> Formula contains expression: \"" <<
TString::Format(
"(%i)",ipar) <<
"\", "
200 <<
"which cannot be attributed to a parameter; "
201 <<
"it may be that the number of variable ranges given via \"ParRanges\" "
202 <<
"does not match the number of parameters in the formula expression, please verify!"
215 <<
"<CreateFormula> Formula contains expression: \"" <<
TString::Format(
"x%i",
ivar) <<
"\", "
216 <<
"which cannot be attributed to an input variable" <<
Endl;
219 Log() <<
"User-defined formula string : \"" << fFormulaStringP <<
"\"" <<
Endl;
220 Log() <<
"TFormula-compatible formula string: \"" << fFormulaStringT <<
"\"" <<
Endl;
221 Log() << kDEBUG <<
"Creating and compiling formula" <<
Endl;
224 if (fFormula)
delete fFormula;
225 fFormula =
new TFormula(
"FDA_Formula", fFormulaStringT );
228 if (!fFormula->IsValid())
229 Log() << kFATAL <<
"<ProcessOptions> Formula expression could not be properly compiled" <<
Endl;
232 if (fFormula->GetNpar() > (
Int_t)(fNPars + GetNvar()))
233 Log() << kFATAL <<
"<ProcessOptions> Dubious number of parameters in formula expression: "
234 << fFormula->GetNpar() <<
" - compared to maximum allowed: " << fNPars + GetNvar() <<
Endl;
243 fParRangeStringT = fParRangeStringP;
246 fParRangeStringT.ReplaceAll(
" ",
"" );
247 fNPars = fParRangeStringT.CountChar(
')' );
251 Log() << kFATAL <<
"<ProcessOptions> Mismatch in parameter string: "
252 <<
"the number of parameters: " << fNPars <<
" != ranges defined: "
253 <<
parList->GetSize() <<
"; the format of the \"ParRanges\" string "
254 <<
"must be: \"(-1.2,3.4);(-2.3,4.55);...\", "
255 <<
"where the numbers in \"(a,b)\" correspond to the a=min, b=max parameter ranges; "
256 <<
"each parameter defined in the function string must have a corresponding rang."
260 fParRange.resize( fNPars );
261 for (
UInt_t ipar=0; ipar<fNPars; ipar++) fParRange[ipar] = 0;
263 for (
UInt_t ipar=0; ipar<fNPars; ipar++) {
275 if (
pmin >
pmax) Log() << kFATAL <<
"<ProcessOptions> max > min in interval for parameter: ["
276 << ipar <<
"] : [" <<
pmin <<
", " <<
pmax <<
"] " <<
Endl;
278 Log() << kINFO <<
"Create parameter interval for parameter " << ipar <<
" : [" <<
pmin <<
"," <<
pmax <<
"]" <<
Endl;
288 fOutputDimensions = 1;
290 fOutputDimensions = DataInfo().GetNTargets();
292 fOutputDimensions = DataInfo().GetNClasses();
294 for(
Int_t dim = 1; dim < fOutputDimensions; ++dim ){
295 for(
UInt_t par = 0; par < fNPars; ++par ){
296 fParRange.push_back( fParRange.at(par) );
303 if (fConverger ==
"MINUIT") {
305 SetOptions(
dynamic_cast<Configurable*
>(fConvergerFitter)->GetOptions());
308 if(fFitMethod ==
"MC")
309 fFitter =
new MCFitter( *fConvergerFitter,
TString::Format(
"%s_Fitter_MC", GetName()), fParRange, GetOptions() );
310 else if (fFitMethod ==
"GA")
312 else if (fFitMethod ==
"SA")
314 else if (fFitMethod ==
"MINUIT")
317 Log() << kFATAL <<
"<Train> Do not understand fit method:" << fFitMethod <<
Endl;
320 fFitter->CheckForUnusedOptions();
351 for (
UInt_t ipar=0; ipar<fParRange.size() && ipar<fNPars; ipar++) {
352 if (fParRange[ipar] != 0) {
delete fParRange[ipar]; fParRange[ipar] = 0; }
356 if (fFormula != 0) {
delete fFormula; fFormula = 0; }
367 fSumOfWeightsSig = 0;
368 fSumOfWeightsBkg = 0;
378 if (!DoRegression()) {
379 if (DataInfo().IsSignal(
ev)) { fSumOfWeightsSig +=
w; }
380 else { fSumOfWeightsBkg +=
w; }
386 if (!DoRegression()) {
387 if (fSumOfWeightsSig <= 0 || fSumOfWeightsBkg <= 0) {
388 Log() << kFATAL <<
"<Train> Troubles in sum of weights: "
389 << fSumOfWeightsSig <<
" (S) : " << fSumOfWeightsBkg <<
" (B)" <<
Endl;
392 else if (fSumOfWeights <= 0) {
393 Log() << kFATAL <<
"<Train> Troubles in sum of weights: "
394 << fSumOfWeights <<
Endl;
400 fBestPars.push_back( (*parIt)->GetMean() );
407 PrintResults( fFitMethod, fBestPars,
estimator );
409 delete fFitter; fFitter = 0;
410 if (fConvergerFitter!=0 && fConvergerFitter!=(
IFitterTarget*)
this) {
411 delete fConvergerFitter;
412 fConvergerFitter = 0;
424 Log() << kHEADER <<
"Results for parameter fit using \"" << fitter <<
"\" fitter:" <<
Endl;
428 Log() <<
"Discriminator expression: \"" << fFormulaStringP <<
"\"" <<
Endl;
429 Log() <<
"Value of estimator at minimum: " <<
estimator <<
Endl;
444 if( DoRegression() ){
449 for(
Int_t dim = 0; dim < fOutputDimensions; ++dim ){
451 result = InterpretFormula(
ev, pars.begin(), pars.end() );
460 }
else if( DoMulticlass() ){
465 CalculateMulticlassValues(
ev, pars, *fMulticlassReturnVal );
468 for(
Int_t dim = 0; dim < fOutputDimensions; ++dim ){
469 Double_t y = fMulticlassReturnVal->at(dim);
484 desired = (DataInfo().IsSignal(
ev) ? 1.0 : 0.0);
485 result = InterpretFormula(
ev, pars.begin(), pars.end() );
503 for( std::vector<Double_t>::iterator it =
parBegin; it !=
parEnd; ++it ){
505 fFormula->SetParameter( ipar, (*it) );
525 return InterpretFormula(
ev, fBestPars.
begin(), fBestPars.end() );
532 if (fRegressionReturnVal ==
NULL) fRegressionReturnVal =
new std::vector<Float_t>();
533 fRegressionReturnVal->clear();
539 for(
Int_t dim = 0; dim < fOutputDimensions; ++dim ){
543 const Event*
evT2 = GetTransformationHandler().InverseTransform(
evT );
544 fRegressionReturnVal->push_back(
evT2->GetTarget(0));
548 return (*fRegressionReturnVal);
555 if (fMulticlassReturnVal ==
NULL) fMulticlassReturnVal =
new std::vector<Float_t>();
556 fMulticlassReturnVal->clear();
557 std::vector<Float_t> temp;
562 CalculateMulticlassValues(
evt, fBestPars, temp );
571 (*fMulticlassReturnVal).push_back(1.0/(1.0+
norm));
574 return (*fMulticlassReturnVal);
591 for(
Int_t dim = 0; dim < fOutputDimensions; ++dim ){
595 values.push_back(
value );
609 fBestPars.resize( fNPars );
622 for (
UInt_t ipar=0; ipar<fNPars*fOutputDimensions; ipar++) {
643 fOutputDimensions = 1;
647 fBestPars.resize( fNPars*fOutputDimensions );
657 if (ipar >= fNPars*fOutputDimensions) Log() << kFATAL <<
"<ReadWeightsFromXML> index out of range: "
658 << ipar <<
" >= " << fNPars <<
Endl;
659 fBestPars[ipar] = par;
676 fout <<
" double fParameter[" << fNPars <<
"];" << std::endl;
677 fout <<
"};" << std::endl;
678 fout <<
"" << std::endl;
679 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
680 fout <<
"{" << std::endl;
681 for(
UInt_t ipar=0; ipar<fNPars; ipar++) {
682 fout <<
" fParameter[" << ipar <<
"] = " << fBestPars[ipar] <<
";" << std::endl;
684 fout <<
"}" << std::endl;
686 fout <<
"inline double " << className <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
687 fout <<
"{" << std::endl;
688 fout <<
" // interpret the formula" << std::endl;
692 for (
UInt_t ipar=0; ipar<fNPars; ipar++) {
701 fout <<
" double retval = " << str <<
";" << std::endl;
703 fout <<
" return retval; " << std::endl;
704 fout <<
"}" << std::endl;
706 fout <<
"// Clean up" << std::endl;
707 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
708 fout <<
"{" << std::endl;
709 fout <<
" // nothing to clear" << std::endl;
710 fout <<
"}" << std::endl;
724 Log() <<
"The function discriminant analysis (FDA) is a classifier suitable " <<
Endl;
725 Log() <<
"to solve linear or simple nonlinear discrimination problems." <<
Endl;
727 Log() <<
"The user provides the desired function with adjustable parameters" <<
Endl;
728 Log() <<
"via the configuration option string, and FDA fits the parameters to" <<
Endl;
729 Log() <<
"it, requiring the signal (background) function value to be as close" <<
Endl;
730 Log() <<
"as possible to 1 (0). Its advantage over the more involved and" <<
Endl;
731 Log() <<
"automatic nonlinear discriminators is the simplicity and transparency " <<
Endl;
732 Log() <<
"of the discrimination expression. A shortcoming is that FDA will" <<
Endl;
733 Log() <<
"underperform for involved problems with complicated, phase space" <<
Endl;
734 Log() <<
"dependent nonlinear correlations." <<
Endl;
736 Log() <<
"Please consult the Users Guide for the format of the formula string" <<
Endl;
737 Log() <<
"and the allowed parameter ranges:" <<
Endl;
738 if (
gConfig().WriteOptionsReference()) {
739 Log() <<
"<a href=\"https://github.com/root-project/root/blob/master/documentation/tmva/UsersGuide/TMVAUsersGuide.pdf\">"
740 <<
"TMVAUsersGuide.pdf</a>" <<
Endl;
742 else Log() <<
"documentation/tmva/UsersGuide/TMVAUsersGuide.pdf" <<
Endl;
746 Log() <<
"The FDA performance depends on the complexity and fidelity of the" <<
Endl;
747 Log() <<
"user-defined discriminator function. As a general rule, it should" <<
Endl;
748 Log() <<
"be able to reproduce the discrimination power of any linear" <<
Endl;
749 Log() <<
"discriminant analysis. To reach into the nonlinear domain, it is" <<
Endl;
750 Log() <<
"useful to inspect the correlation profiles of the input variables," <<
Endl;
751 Log() <<
"and add quadratic and higher polynomial terms between variables as" <<
Endl;
752 Log() <<
"necessary. Comparison with more involved nonlinear classifiers can" <<
Endl;
753 Log() <<
"be used as a guide." <<
Endl;
757 Log() <<
"Depending on the function used, the choice of \"FitMethod\" is" <<
Endl;
758 Log() <<
"crucial for getting valuable solutions with FDA. As a guideline it" <<
Endl;
759 Log() <<
"is recommended to start with \"FitMethod=MINUIT\". When more complex" <<
Endl;
760 Log() <<
"functions are used where MINUIT does not converge to reasonable" <<
Endl;
761 Log() <<
"results, the user should switch to non-gradient FitMethods such" <<
Endl;
762 Log() <<
"as GeneticAlgorithm (GA) or Monte Carlo (MC). It might prove to be" <<
Endl;
763 Log() <<
"useful to combine GA (or MC) with MINUIT by setting the option" <<
Endl;
764 Log() <<
"\"Converger=MINUIT\". GA (MC) will then set the starting parameters" <<
Endl;
765 Log() <<
"for MINUIT such that the basic quality of GA (MC) of finding global" <<
Endl;
766 Log() <<
"minima is combined with the efficacy of MINUIT of finding local" <<
Endl;
767 Log() <<
"minima." <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
int Int_t
Signed integer 4 bytes (int)
float Float_t
Float 4 bytes (float)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
const_iterator begin() const
const_iterator end() const
Class that contains all the data information.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Fitter using a Genetic Algorithm.
Interface for a fitter 'target'.
The TMVA::Interval Class.
Fitter using Monte Carlo sampling of parameters.
Virtual base Class for all MVA method.
Function discriminant analysis (FDA).
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
FDA can handle classification with 2 classes and regression with one regression-target.
const std::vector< Float_t > & GetRegressionValues() override
virtual ~MethodFDA(void)
destructor
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
void Train(void) override
FDA training.
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > ¶meters, std::vector< Float_t > &values)
calculate the values for multiclass
void ReadWeightsFromStream(std::istream &i) override
read back the training results from a file (stream)
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
void ClearAll()
delete and clear all class members
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
returns MVA value for given event
void MakeClassSpecific(std::ostream &, const TString &) const override
write FDA-specific classifier response
void AddWeightsXMLTo(void *parent) const override
create XML description for LD classification and regression (for arbitrary number of output classes/t...
void ProcessOptions() override
the option string is decoded, for available options see "DeclareOptions"
void ReadWeightsFromXML(void *wghtnode) override
read coefficients from xml weight file
Double_t EstimatorFunction(std::vector< Double_t > &) override
compute estimator for given parameter set (to be minimised)
void DeclareOptions() override
define the options (their key words) that can be set in the option string
const std::vector< Float_t > & GetMulticlassValues() override
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
void Init(void) override
default initialisation
void GetHelpMessage() const override
get help message text
Fitter using a Simulated Annealing Algorithm.
Singleton class for Global types used by TMVA.
Collectable string class.
TString & ReplaceAll(const TString &s1, const TString &s2)
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
MsgLogger & Endl(MsgLogger &ml)
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Returns x raised to the power y.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.