77 using std::stringstream;
95 fConvergerFitter( 0 ),
96 fSumOfWeightsSig( 0 ),
97 fSumOfWeightsBkg( 0 ),
99 fOutputDimensions( 0 )
113 fConvergerFitter( 0 ),
114 fSumOfWeightsSig( 0 ),
115 fSumOfWeightsBkg( 0 ),
117 fOutputDimensions( 0 )
196 for (
Int_t ipar=fNPars; ipar<1000; ipar++) {
199 <<
"<CreateFormula> Formula contains expression: \"" <<
Form(
"(%i)",ipar) <<
"\", " 200 <<
"which cannot be attributed to a parameter; " 201 <<
"it may be that the number of variable ranges given via \"ParRanges\" " 202 <<
"does not match the number of parameters in the formula expression, please verify!" 215 <<
"<CreateFormula> Formula contains expression: \"" <<
Form(
"x%i",ivar) <<
"\", " 216 <<
"which cannot be attributed to an input variable" <<
Endl;
221 Log() << kDEBUG <<
"Creating and compiling formula" <<
Endl;
229 Log() << kFATAL <<
"<ProcessOptions> Formula expression could not be properly compiled" <<
Endl;
233 Log() << kFATAL <<
"<ProcessOptions> Dubious number of parameters in formula expression: " 251 Log() << kFATAL <<
"<ProcessOptions> Mismatch in parameter string: " 252 <<
"the number of parameters: " <<
fNPars <<
" != ranges defined: " 253 << parList->
GetSize() <<
"; the format of the \"ParRanges\" string " 254 <<
"must be: \"(-1.2,3.4);(-2.3,4.55);...\", " 255 <<
"where the numbers in \"(a,b)\" correspond to the a=min, b=max parameter ranges; " 256 <<
"each parameter defined in the function string must have a corresponding rang." 270 stringstream stmin;
Float_t pmin=0; stmin << pminS.
Data(); stmin >> pmin;
271 stringstream stmax;
Float_t pmax=0; stmax << pmaxS.
Data(); stmax >> pmax;
275 if (pmin > pmax)
Log() << kFATAL <<
"<ProcessOptions> max > min in interval for parameter: [" 276 << ipar <<
"] : [" << pmin <<
", " << pmax <<
"] " <<
Endl;
278 Log() << kINFO <<
"Create parameter interval for parameter " << ipar <<
" : [" << pmin <<
"," << pmax <<
"]" <<
Endl;
317 Log() << kFATAL <<
"<Train> Do not understand fit method:" <<
fFitMethod <<
Endl;
388 Log() << kFATAL <<
"<Train> Troubles in sum of weights: " 393 Log() << kFATAL <<
"<Train> Troubles in sum of weights: " 399 for (std::vector<Interval*>::const_iterator parIt =
fParRange.begin(); parIt !=
fParRange.end(); parIt++) {
400 fBestPars.push_back( (*parIt)->GetMean() );
424 Log() << kHEADER <<
"Results for parameter fit using \"" << fitter <<
"\" fitter:" <<
Endl;
425 std::vector<TString> parNames;
426 for (
UInt_t ipar=0; ipar<pars.size(); ipar++) parNames.push_back(
Form(
"Par(%i)",ipar ) );
429 Log() <<
"Value of estimator at minimum: " << estimator <<
Endl;
453 estimator[2] += deviation * ev->
GetWeight();
456 estimator[2] /= sumOfWeights[2];
471 crossEntropy += t*
log(y);
475 estimator[2] /= sumOfWeights[2];
489 estimator[0] /= sumOfWeights[0];
490 estimator[1] /= sumOfWeights[1];
492 return estimator[0] + estimator[1];
503 for( std::vector<Double_t>::iterator it = parBegin; it != parEnd; ++it ){
557 std::vector<Float_t> temp;
565 for(
UInt_t iClass=0; iClass<nClasses; iClass++){
567 for(
UInt_t j=0;j<nClasses;j++){
569 norm+=
exp(temp[j]-temp[iClass]);
571 (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
596 values.push_back( value );
644 if(
gTools().HasAttr( wghtnode,
"NDim")) {
662 if (ipar >=
fNPars*fOutputDimensions)
Log() << kFATAL <<
"<ReadWeightsFromXML> index out of range: " 681 fout <<
" double fParameter[" <<
fNPars <<
"];" << std::endl;
682 fout <<
"};" << std::endl;
683 fout <<
"" << std::endl;
684 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
685 fout <<
"{" << std::endl;
687 fout <<
" fParameter[" << ipar <<
"] = " <<
fBestPars[ipar] <<
";" << std::endl;
689 fout <<
"}" << std::endl;
691 fout <<
"inline double " << className <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
692 fout <<
"{" << std::endl;
693 fout <<
" // interpret the formula" << std::endl;
706 fout <<
" double retval = " << str <<
";" << std::endl;
708 fout <<
" return retval; " << std::endl;
709 fout <<
"}" << std::endl;
711 fout <<
"// Clean up" << std::endl;
712 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
713 fout <<
"{" << std::endl;
714 fout <<
" // nothing to clear" << std::endl;
715 fout <<
"}" << std::endl;
729 Log() <<
"The function discriminant analysis (FDA) is a classifier suitable " <<
Endl;
730 Log() <<
"to solve linear or simple nonlinear discrimination problems." <<
Endl;
732 Log() <<
"The user provides the desired function with adjustable parameters" <<
Endl;
733 Log() <<
"via the configuration option string, and FDA fits the parameters to" <<
Endl;
734 Log() <<
"it, requiring the signal (background) function value to be as close" <<
Endl;
735 Log() <<
"as possible to 1 (0). Its advantage over the more involved and" <<
Endl;
736 Log() <<
"automatic nonlinear discriminators is the simplicity and transparency " <<
Endl;
737 Log() <<
"of the discrimination expression. A shortcoming is that FDA will" <<
Endl;
738 Log() <<
"underperform for involved problems with complicated, phase space" <<
Endl;
739 Log() <<
"dependent nonlinear correlations." <<
Endl;
741 Log() <<
"Please consult the Users Guide for the format of the formula string" <<
Endl;
742 Log() <<
"and the allowed parameter ranges:" <<
Endl;
743 if (
gConfig().WriteOptionsReference()) {
744 Log() <<
"<a href=\"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf\">" 745 <<
"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf</a>" <<
Endl;
747 else Log() <<
"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf" <<
Endl;
751 Log() <<
"The FDA performance depends on the complexity and fidelity of the" <<
Endl;
752 Log() <<
"user-defined discriminator function. As a general rule, it should" <<
Endl;
753 Log() <<
"be able to reproduce the discrimination power of any linear" <<
Endl;
754 Log() <<
"discriminant analysis. To reach into the nonlinear domain, it is" <<
Endl;
755 Log() <<
"useful to inspect the correlation profiles of the input variables," <<
Endl;
756 Log() <<
"and add quadratic and higher polynomial terms between variables as" <<
Endl;
757 Log() <<
"necessary. Comparison with more involved nonlinear classifiers can" <<
Endl;
758 Log() <<
"be used as a guide." <<
Endl;
762 Log() <<
"Depending on the function used, the choice of \"FitMethod\" is" <<
Endl;
763 Log() <<
"crucial for getting valuable solutions with FDA. As a guideline it" <<
Endl;
764 Log() <<
"is recommended to start with \"FitMethod=MINUIT\". When more complex" <<
Endl;
765 Log() <<
"functions are used where MINUIT does not converge to reasonable" <<
Endl;
766 Log() <<
"results, the user should switch to non-gradient FitMethods such" <<
Endl;
767 Log() <<
"as GeneticAlgorithm (GA) or Monte Carlo (MC). It might prove to be" <<
Endl;
768 Log() <<
"useful to combine GA (or MC) with MINUIT by setting the option" <<
Endl;
769 Log() <<
"\"Converger=MINUIT\". GA (MC) will then set the starting parameters" <<
Endl;
770 Log() <<
"for MINUIT such that the basic quality of GA (MC) of finding global" <<
Endl;
771 Log() <<
"minima is combined with the efficacy of MINUIT of finding local" <<
Endl;
void Init(void)
default initialisation
static long int sum(long int i)
Double_t fSumOfWeightsBkg
void DeclareOptions()
define the options (their key words) that can be set in the option string
MsgLogger & Endl(MsgLogger &ml)
void ClearAll()
delete and clear all class members
Singleton class for Global types used by TMVA.
Collectable string class.
TString & ReplaceAll(const TString &s1, const TString &s2)
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Virtual base Class for all MVA method.
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
UInt_t GetNClasses() const
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
std::vector< Double_t > fBestPars
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
Double_t Run()
estimator function interface for fitting
std::vector< Interval * > fParRange
const Event * GetEvent() const
DataSetInfo & DataInfo() const
void SetOptions(const TString &s)
Bool_t DoRegression() const
Ssiz_t First(char c) const
Find first occurrence of a character c.
Class that contains all the data information.
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Bool_t DoMulticlass() const
void GetHelpMessage() const
get help message text
Float_t GetTarget(UInt_t itgt) const
UInt_t GetNTargets() const
const char * GetName() const
void MakeClassSpecific(std::ostream &, const TString &) const
write FDA-specific classifier response
The TMVA::Interval Class.
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > ¶meters, std::vector< Float_t > &values)
calculate the values for multiclass
char * Form(const char *fmt,...)
IFitterTarget * fConvergerFitter
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
void Train(void)
FDA training.
Fitter using Monte Carlo sampling of parameters.
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
std::vector< Float_t > * fMulticlassReturnVal
Double_t EstimatorFunction(std::vector< Double_t > &)
compute estimator for given parameter set (to be minimised)
void ReadWeightsFromStream(std::istream &i)
read back the training results from a file (stream)
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
void AddPreDefVal(const T &)
const TString & GetOptions() const
Double_t fSumOfWeightsSig
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
virtual const std::vector< Float_t > & GetRegressionValues()
Int_t CountChar(Int_t c) const
Return number of times character c occurs in the string.
Bool_t IsSignal(const Event *ev) const
virtual const std::vector< Float_t > & GetMulticlassValues()
std::vector< Float_t > * fRegressionReturnVal
Interface for a fitter 'target'.
virtual Int_t GetSize() const
double norm(double *x, double *p)
Fitter using a Genetic Algorithm.
void CheckForUnusedOptions() const
checks for unused options in option string
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
virtual ~MethodFDA(void)
destructor
Fitter using a Simulated Annealing Algorithm.
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
const char * Data() const
Function discriminant analysis (FDA).