74 using std::stringstream;
92 fConvergerFitter( 0 ),
93 fSumOfWeightsSig( 0 ),
94 fSumOfWeightsBkg( 0 ),
96 fOutputDimensions( 0 )
110 fConvergerFitter( 0 ),
111 fSumOfWeightsSig( 0 ),
112 fSumOfWeightsBkg( 0 ),
114 fOutputDimensions( 0 )
190 for (
Int_t ipar=fNPars; ipar<1000; ipar++) {
193 <<
"<CreateFormula> Formula contains expression: \"" <<
Form(
"(%i)",ipar) <<
"\", " 194 <<
"which cannot be attributed to a parameter; " 195 <<
"it may be that the number of variable ranges given via \"ParRanges\" " 196 <<
"does not match the number of parameters in the formula expression, please verify!" 209 <<
"<CreateFormula> Formula contains expression: \"" <<
Form(
"x%i",ivar) <<
"\", " 210 <<
"which cannot be attributed to an input variable" <<
Endl;
223 Log() <<
kFATAL <<
"<ProcessOptions> Formula expression could not be properly compiled" <<
Endl;
227 Log() <<
kFATAL <<
"<ProcessOptions> Dubious number of parameters in formula expression: " 245 Log() <<
kFATAL <<
"<ProcessOptions> Mismatch in parameter string: " 246 <<
"the number of parameters: " <<
fNPars <<
" != ranges defined: " 247 << parList->
GetSize() <<
"; the format of the \"ParRanges\" string " 248 <<
"must be: \"(-1.2,3.4);(-2.3,4.55);...\", " 249 <<
"where the numbers in \"(a,b)\" correspond to the a=min, b=max parameter ranges; " 250 <<
"each parameter defined in the function string must have a corresponding rang." 264 stringstream stmin;
Float_t pmin=0; stmin << pminS.
Data(); stmin >> pmin;
265 stringstream stmax;
Float_t pmax=0; stmax << pmaxS.
Data(); stmax >> pmax;
269 if (pmin > pmax)
Log() <<
kFATAL <<
"<ProcessOptions> max > min in interval for parameter: [" 270 << ipar <<
"] : [" << pmin <<
", " << pmax <<
"] " <<
Endl;
272 Log() <<
kINFO <<
"Create parameter interval for parameter " << ipar <<
" : [" << pmin <<
"," << pmax <<
"]" <<
Endl;
382 Log() <<
kFATAL <<
"<Train> Troubles in sum of weights: " 387 Log() <<
kFATAL <<
"<Train> Troubles in sum of weights: " 393 for (std::vector<Interval*>::const_iterator parIt =
fParRange.begin(); parIt !=
fParRange.end(); parIt++) {
394 fBestPars.push_back( (*parIt)->GetMean() );
418 Log() <<
kHEADER <<
"Results for parameter fit using \"" << fitter <<
"\" fitter:" <<
Endl;
419 std::vector<TString> parNames;
420 for (
UInt_t ipar=0; ipar<pars.size(); ipar++) parNames.push_back(
Form(
"Par(%i)",ipar ) );
423 Log() <<
"Value of estimator at minimum: " << estimator <<
Endl;
449 estimator[2] += deviation * ev->
GetWeight();
452 estimator[2] /= sumOfWeights[2];
467 crossEntropy += t*
log(y);
471 estimator[2] /= sumOfWeights[2];
485 estimator[0] /= sumOfWeights[0];
486 estimator[1] /= sumOfWeights[1];
488 return estimator[0] + estimator[1];
499 for( std::vector<Double_t>::iterator it = parBegin; it != parEnd; ++it ){
554 std::vector<Float_t> temp;
562 for(
UInt_t iClass=0; iClass<nClasses; iClass++){
564 for(
UInt_t j=0;j<nClasses;j++){
566 norm+=
exp(temp[j]-temp[iClass]);
568 (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
593 values.push_back( value );
643 if(
gTools().HasAttr( wghtnode,
"NDim")) {
661 if (ipar >=
fNPars*fOutputDimensions)
Log() <<
kFATAL <<
"<ReadWeightsFromXML> index out of range: " 680 fout <<
" double fParameter[" <<
fNPars <<
"];" << std::endl;
681 fout <<
"};" << std::endl;
682 fout <<
"" << std::endl;
683 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
684 fout <<
"{" << std::endl;
686 fout <<
" fParameter[" << ipar <<
"] = " <<
fBestPars[ipar] <<
";" << std::endl;
688 fout <<
"}" << std::endl;
690 fout <<
"inline double " << className <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
691 fout <<
"{" << std::endl;
692 fout <<
" // interpret the formula" << std::endl;
705 fout <<
" double retval = " << str <<
";" << std::endl;
707 fout <<
" return retval; " << std::endl;
708 fout <<
"}" << std::endl;
710 fout <<
"// Clean up" << std::endl;
711 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
712 fout <<
"{" << std::endl;
713 fout <<
" // nothing to clear" << std::endl;
714 fout <<
"}" << std::endl;
728 Log() <<
"The function discriminant analysis (FDA) is a classifier suitable " <<
Endl;
729 Log() <<
"to solve linear or simple nonlinear discrimination problems." <<
Endl;
731 Log() <<
"The user provides the desired function with adjustable parameters" <<
Endl;
732 Log() <<
"via the configuration option string, and FDA fits the parameters to" <<
Endl;
733 Log() <<
"it, requiring the signal (background) function value to be as close" <<
Endl;
734 Log() <<
"as possible to 1 (0). Its advantage over the more involved and" <<
Endl;
735 Log() <<
"automatic nonlinear discriminators is the simplicity and transparency " <<
Endl;
736 Log() <<
"of the discrimination expression. A shortcoming is that FDA will" <<
Endl;
737 Log() <<
"underperform for involved problems with complicated, phase space" <<
Endl;
738 Log() <<
"dependent nonlinear correlations." <<
Endl;
740 Log() <<
"Please consult the Users Guide for the format of the formula string" <<
Endl;
741 Log() <<
"and the allowed parameter ranges:" <<
Endl;
742 if (
gConfig().WriteOptionsReference()) {
743 Log() <<
"<a href=\"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf\">" 744 <<
"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf</a>" <<
Endl;
746 else Log() <<
"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf" <<
Endl;
750 Log() <<
"The FDA performance depends on the complexity and fidelity of the" <<
Endl;
751 Log() <<
"user-defined discriminator function. As a general rule, it should" <<
Endl;
752 Log() <<
"be able to reproduce the discrimination power of any linear" <<
Endl;
753 Log() <<
"discriminant analysis. To reach into the nonlinear domain, it is" <<
Endl;
754 Log() <<
"useful to inspect the correlation profiles of the input variables," <<
Endl;
755 Log() <<
"and add quadratic and higher polynomial terms between variables as" <<
Endl;
756 Log() <<
"necessary. Comparison with more involved nonlinear classifiers can" <<
Endl;
757 Log() <<
"be used as a guide." <<
Endl;
761 Log() <<
"Depending on the function used, the choice of \"FitMethod\" is" <<
Endl;
762 Log() <<
"crucial for getting valuable solutions with FDA. As a guideline it" <<
Endl;
763 Log() <<
"is recommended to start with \"FitMethod=MINUIT\". When more complex" <<
Endl;
764 Log() <<
"functions are used where MINUIT does not converge to reasonable" <<
Endl;
765 Log() <<
"results, the user should switch to non-gradient FitMethods such" <<
Endl;
766 Log() <<
"as GeneticAlgorithm (GA) or Monte Carlo (MC). It might prove to be" <<
Endl;
767 Log() <<
"useful to combine GA (or MC) with MINUIT by setting the option" <<
Endl;
768 Log() <<
"\"Converger=MINUIT\". GA (MC) will then set the starting parameters" <<
Endl;
769 Log() <<
"for MINUIT such that the basic quality of GA (MC) of finding global" <<
Endl;
770 Log() <<
"minima is combined with the efficacy of MINUIT of finding local" <<
Endl;
void Init(void)
default initialisation
static long int sum(long int i)
Double_t fSumOfWeightsBkg
void DeclareOptions()
define the options (their key words) that can be set in the option string
MsgLogger & Endl(MsgLogger &ml)
void ClearAll()
delete and clear all class members
#define REGISTER_METHOD(CLASS)
for example
Collectable string class.
TString & ReplaceAll(const TString &s1, const TString &s2)
const char * GetName() const
void CheckForUnusedOptions() const
checks for unused options in option string
UInt_t GetNClasses() const
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
UInt_t GetNTargets() const
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
const char * Data() const
std::vector< Double_t > fBestPars
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
Double_t Run()
estimator function interface for fitting
Bool_t IsSignal(const Event *ev) const
std::vector< Interval * > fParRange
Bool_t DoMulticlass() const
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
void SetOptions(const TString &s)
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > ¶meters, std::vector< Float_t > &values)
calculate the values for multiclass
const Event * GetEvent() const
char * Form(const char *fmt,...)
IFitterTarget * fConvergerFitter
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
Int_t CountChar(Int_t c) const
Return number of times character c occurs in the string.
void Train(void)
FDA training.
void ProcessOptions()
the option string is decoded, for availabel options see "DeclareOptions"
void MakeClassSpecific(std::ostream &, const TString &) const
write FDA-specific classifier response
virtual Int_t GetSize() const
std::vector< Float_t > * fMulticlassReturnVal
Double_t EstimatorFunction(std::vector< Double_t > &)
compute estimator for given parameter set (to be minimised) const Double_t sumOfWeights[] = { fSumOfW...
void ReadWeightsFromStream(std::istream &i)
read back the training results from a file (stream)
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
DataSetInfo & DataInfo() const
void AddPreDefVal(const T &)
Float_t GetTarget(UInt_t itgt) const
Double_t fSumOfWeightsSig
Bool_t DoRegression() const
Abstract ClassifierFactory template that handles arbitrary types.
const TString & GetOptions() const
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
virtual const std::vector< Float_t > & GetRegressionValues()
virtual const std::vector< Float_t > & GetMulticlassValues()
std::vector< Float_t > * fRegressionReturnVal
void GetHelpMessage() const
get help message text
double norm(double *x, double *p)
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
virtual ~MethodFDA(void)
destructor
Ssiz_t First(char c) const
Find first occurrence of a character c.
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)