77using std::stringstream;
198 <<
"<CreateFormula> Formula contains expression: \"" <<
TString::Format(
"(%i)",ipar) <<
"\", "
199 <<
"which cannot be attributed to a parameter; "
200 <<
"it may be that the number of variable ranges given via \"ParRanges\" "
201 <<
"does not match the number of parameters in the formula expression, please verify!"
214 <<
"<CreateFormula> Formula contains expression: \"" <<
TString::Format(
"x%i",ivar) <<
"\", "
215 <<
"which cannot be attributed to an input variable" <<
Endl;
220 Log() << kDEBUG <<
"Creating and compiling formula" <<
Endl;
228 Log() << kFATAL <<
"<ProcessOptions> Formula expression could not be properly compiled" <<
Endl;
232 Log() << kFATAL <<
"<ProcessOptions> Dubious number of parameters in formula expression: "
250 Log() << kFATAL <<
"<ProcessOptions> Mismatch in parameter string: "
251 <<
"the number of parameters: " <<
fNPars <<
" != ranges defined: "
252 << parList->
GetSize() <<
"; the format of the \"ParRanges\" string "
253 <<
"must be: \"(-1.2,3.4);(-2.3,4.55);...\", "
254 <<
"where the numbers in \"(a,b)\" correspond to the a=min, b=max parameter ranges; "
255 <<
"each parameter defined in the function string must have a corresponding rang."
265 Ssiz_t istr = str.First(
',' );
267 TString pmaxS(str(istr+1,str.Length()-2-istr));
269 stringstream stmin;
Float_t pmin=0; stmin << pminS.
Data(); stmin >> pmin;
270 stringstream stmax;
Float_t pmax=0; stmax << pmaxS.
Data(); stmax >> pmax;
273 if (
TMath::Abs(pmax-pmin) < 1.e-30) pmax = pmin;
274 if (pmin > pmax)
Log() << kFATAL <<
"<ProcessOptions> max > min in interval for parameter: ["
275 << ipar <<
"] : [" << pmin <<
", " << pmax <<
"] " <<
Endl;
277 Log() << kINFO <<
"Create parameter interval for parameter " << ipar <<
" : [" << pmin <<
"," << pmax <<
"]" <<
Endl;
316 Log() << kFATAL <<
"<Train> Do not understand fit method:" <<
fFitMethod <<
Endl;
319 fFitter->CheckForUnusedOptions();
387 Log() << kFATAL <<
"<Train> Troubles in sum of weights: "
392 Log() << kFATAL <<
"<Train> Troubles in sum of weights: "
398 for (std::vector<Interval*>::const_iterator parIt =
fParRange.begin(); parIt !=
fParRange.end(); ++parIt) {
399 fBestPars.push_back( (*parIt)->GetMean() );
423 Log() << kHEADER <<
"Results for parameter fit using \"" << fitter <<
"\" fitter:" <<
Endl;
424 std::vector<TString> parNames;
425 for (
UInt_t ipar=0; ipar<pars.size(); ipar++) parNames.push_back(
TString::Format(
"Par(%i)",ipar ) );
428 Log() <<
"Value of estimator at minimum: " << estimator <<
Endl;
452 estimator[2] += deviation * ev->
GetWeight();
455 estimator[2] /= sumOfWeights[2];
474 estimator[2] /= sumOfWeights[2];
483 desired = (
DataInfo().IsSignal(ev) ? 1.0 : 0.0);
488 estimator[0] /= sumOfWeights[0];
489 estimator[1] /= sumOfWeights[1];
491 return estimator[0] + estimator[1];
502 for( std::vector<Double_t>::iterator it = parBegin; it != parEnd; ++it ){
504 fFormula->SetParameter( ipar, (*it) );
556 std::vector<Float_t> temp;
564 for(
UInt_t iClass=0; iClass<nClasses; iClass++){
566 for(
UInt_t j=0;j<nClasses;j++){
568 norm+=exp(temp[j]-temp[iClass]);
570 (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
594 values.push_back( value );
638 if(
gTools().HasAttr( wghtnode,
"NDim")) {
675 fout <<
" double fParameter[" <<
fNPars <<
"];" << std::endl;
676 fout <<
"};" << std::endl;
677 fout <<
"" << std::endl;
678 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
679 fout <<
"{" << std::endl;
681 fout <<
" fParameter[" << ipar <<
"] = " <<
fBestPars[ipar] <<
";" << std::endl;
683 fout <<
"}" << std::endl;
685 fout <<
"inline double " << className <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
686 fout <<
"{" << std::endl;
687 fout <<
" // interpret the formula" << std::endl;
700 fout <<
" double retval = " << str <<
";" << std::endl;
702 fout <<
" return retval; " << std::endl;
703 fout <<
"}" << std::endl;
705 fout <<
"// Clean up" << std::endl;
706 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
707 fout <<
"{" << std::endl;
708 fout <<
" // nothing to clear" << std::endl;
709 fout <<
"}" << std::endl;
723 Log() <<
"The function discriminant analysis (FDA) is a classifier suitable " <<
Endl;
724 Log() <<
"to solve linear or simple nonlinear discrimination problems." <<
Endl;
726 Log() <<
"The user provides the desired function with adjustable parameters" <<
Endl;
727 Log() <<
"via the configuration option string, and FDA fits the parameters to" <<
Endl;
728 Log() <<
"it, requiring the signal (background) function value to be as close" <<
Endl;
729 Log() <<
"as possible to 1 (0). Its advantage over the more involved and" <<
Endl;
730 Log() <<
"automatic nonlinear discriminators is the simplicity and transparency " <<
Endl;
731 Log() <<
"of the discrimination expression. A shortcoming is that FDA will" <<
Endl;
732 Log() <<
"underperform for involved problems with complicated, phase space" <<
Endl;
733 Log() <<
"dependent nonlinear correlations." <<
Endl;
735 Log() <<
"Please consult the Users Guide for the format of the formula string" <<
Endl;
736 Log() <<
"and the allowed parameter ranges:" <<
Endl;
737 if (
gConfig().WriteOptionsReference()) {
738 Log() <<
"<a href=\"https://github.com/root-project/root/blob/master/documentation/tmva/UsersGuide/TMVAUsersGuide.pdf\">"
739 <<
"TMVAUsersGuide.pdf</a>" <<
Endl;
741 else Log() <<
"documentation/tmva/UsersGuide/TMVAUsersGuide.pdf" <<
Endl;
745 Log() <<
"The FDA performance depends on the complexity and fidelity of the" <<
Endl;
746 Log() <<
"user-defined discriminator function. As a general rule, it should" <<
Endl;
747 Log() <<
"be able to reproduce the discrimination power of any linear" <<
Endl;
748 Log() <<
"discriminant analysis. To reach into the nonlinear domain, it is" <<
Endl;
749 Log() <<
"useful to inspect the correlation profiles of the input variables," <<
Endl;
750 Log() <<
"and add quadratic and higher polynomial terms between variables as" <<
Endl;
751 Log() <<
"necessary. Comparison with more involved nonlinear classifiers can" <<
Endl;
752 Log() <<
"be used as a guide." <<
Endl;
756 Log() <<
"Depending on the function used, the choice of \"FitMethod\" is" <<
Endl;
757 Log() <<
"crucial for getting valuable solutions with FDA. As a guideline it" <<
Endl;
758 Log() <<
"is recommended to start with \"FitMethod=MINUIT\". When more complex" <<
Endl;
759 Log() <<
"functions are used where MINUIT does not converge to reasonable" <<
Endl;
760 Log() <<
"results, the user should switch to non-gradient FitMethods such" <<
Endl;
761 Log() <<
"as GeneticAlgorithm (GA) or Monte Carlo (MC). It might prove to be" <<
Endl;
762 Log() <<
"useful to combine GA (or MC) with MINUIT by setting the option" <<
Endl;
763 Log() <<
"\"Converger=MINUIT\". GA (MC) will then set the starting parameters" <<
Endl;
764 Log() <<
"for MINUIT such that the basic quality of GA (MC) of finding global" <<
Endl;
765 Log() <<
"minima is combined with the efficacy of MINUIT of finding local" <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
int Int_t
Signed integer 4 bytes (int).
int Ssiz_t
String size (currently int).
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
bool Bool_t
Boolean (0=false, 1=true) (bool).
double Double_t
Double 8 bytes.
float Float_t
Float 4 bytes (float).
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
TObject * At(Int_t idx) const override
Returns the object at position idx. Returns 0 if idx is out of range.
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void AddPreDefVal(const T &)
const TString & GetOptions() const
void SetOptions(const TString &s)
Class that contains all the data information.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetTarget(UInt_t itgt) const
Fitter using a Genetic Algorithm.
IFitterTarget()
constructor
The TMVA::Interval Class.
Fitter using Monte Carlo sampling of parameters.
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
const char * GetName() const override
Bool_t DoMulticlass() const
UInt_t GetNEvents() const
Bool_t DoRegression() const
std::vector< Float_t > * fRegressionReturnVal
std::vector< Float_t > * fMulticlassReturnVal
const Event * GetEvent() const
DataSetInfo & DataInfo() const
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
FDA can handle classification with 2 classes and regression with one regression-target.
TString fFormulaStringT
string with function
const std::vector< Float_t > & GetRegressionValues() override
virtual ~MethodFDA(void)
destructor
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
void Train(void) override
FDA training.
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > ¶meters, std::vector< Float_t > &values)
calculate the values for multiclass
void ReadWeightsFromStream(std::istream &i) override
read back the training results from a file (stream)
Double_t fSumOfWeightsBkg
sum of weights (background)
Int_t fOutputDimensions
number of output values
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
void ClearAll()
delete and clear all class members
std::vector< Interval * > fParRange
ranges of parameters
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
returns MVA value for given event
Double_t fSumOfWeightsSig
sum of weights (signal)
TString fParRangeStringP
string with ranges of parameters
void MakeClassSpecific(std::ostream &, const TString &) const override
write FDA-specific classifier response
TFormula * fFormula
the discrimination function
std::vector< Double_t > fBestPars
the pars that optimise (minimise) the estimator
void AddWeightsXMLTo(void *parent) const override
create XML description for LD classification and regression (for arbitrary number of output classes/t...
IFitterTarget * fConvergerFitter
intermediate fitter
void ProcessOptions() override
the option string is decoded, for available options see "DeclareOptions"
FitterBase * fFitter
the fitter used in the training
void ReadWeightsFromXML(void *wghtnode) override
read coefficients from xml weight file
Double_t EstimatorFunction(std::vector< Double_t > &) override
compute estimator for given parameter set (to be minimised)
Double_t fSumOfWeights
sum of weights
TString fParRangeStringT
string with ranges of parameters
void DeclareOptions() override
define the options (their key words) that can be set in the option string
const std::vector< Float_t > & GetMulticlassValues() override
TString fFitMethod
estimator optimisation method
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
UInt_t fNPars
number of parameters
TString fConverger
fit method uses fConverger as intermediate step to converge into local minimas
TString fFormulaStringP
string with function
void Init(void) override
default initialisation
void GetHelpMessage() const override
get help message text
Fitter using a Simulated Annealing Algorithm.
Singleton class for Global types used by TMVA.
Collectable string class.
const char * Data() const
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Returns x raised to the power y.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.