131 MethodBase( jobName,
Types::kFisher, methodTitle, dsi, theOption, theTargetDir ),
133 fTheMethod ( "Fisher" ),
134 fFisherMethod ( kFisher ),
154 fTheMethod (
"Fisher" ),
155 fFisherMethod ( kFisher ),
173 fFisherCoeff =
new std::vector<Double_t>( GetNvar() );
176 SetSignalReferenceCut( 0.0 );
191 DeclareOptionRef( fTheMethod =
"Fisher",
"Method",
"Discrimination method" );
192 AddPreDefVal(
TString(
"Fisher"));
193 AddPreDefVal(
TString(
"Mahalanobis"));
201 if (fTheMethod ==
"Fisher" ) fFisherMethod = kFisher;
202 else fFisherMethod = kMahalanobis;
213 if (fBetw ) {
delete fBetw; fBetw = 0; }
214 if (fWith ) {
delete fWith; fWith = 0; }
215 if (fCov ) {
delete fCov; fCov = 0; }
216 if (fDiscrimPow ) {
delete fDiscrimPow; fDiscrimPow = 0; }
217 if (fFisherCoeff) {
delete fFisherCoeff; fFisherCoeff = 0; }
238 GetCov_WithinClass();
241 GetCov_BetweenClass();
263 const Event * ev = GetEvent();
265 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++)
266 result += (*fFisherCoeff)[ivar]*ev->
GetValue(ivar);
269 NoErrorCalc(err, errUpper);
281 fMeanMatx =
new TMatrixD( GetNvar(), 3 );
284 fBetw =
new TMatrixD( GetNvar(), GetNvar() );
285 fWith =
new TMatrixD( GetNvar(), GetNvar() );
286 fCov =
new TMatrixD( GetNvar(), GetNvar() );
289 fDiscrimPow =
new std::vector<Double_t>( GetNvar() );
301 const UInt_t nvar = DataInfo().GetNVariables();
306 for (
UInt_t ivar=0; ivar<nvar; ivar++) { sumS[ivar] = sumB[ivar] = 0; }
309 for (
Int_t ievt=0; ievt<
Data()->GetNEvents(); ievt++) {
312 const Event * ev = GetEvent(ievt);
316 if (DataInfo().IsSignal(ev)) fSumOfWeightsS += weight;
317 else fSumOfWeightsB += weight;
319 Double_t* sum = DataInfo().IsSignal(ev) ? sumS : sumB;
321 for (
UInt_t ivar=0; ivar<nvar; ivar++) sum[ivar] += ev->
GetValue( ivar )*weight;
324 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
325 (*fMeanMatx)( ivar, 2 ) = sumS[ivar];
326 (*fMeanMatx)( ivar, 0 ) = sumS[ivar]/fSumOfWeightsS;
328 (*fMeanMatx)( ivar, 2 ) += sumB[ivar];
329 (*fMeanMatx)( ivar, 1 ) = sumB[ivar]/fSumOfWeightsB;
332 (*fMeanMatx)( ivar, 2 ) /= (fSumOfWeightsS + fSumOfWeightsB);
347 assert( fSumOfWeightsS > 0 && fSumOfWeightsB > 0 );
352 const Int_t nvar = GetNvar();
353 const Int_t nvar2 = nvar*nvar;
357 memset(sumSig,0,nvar2*
sizeof(
Double_t));
358 memset(sumBgd,0,nvar2*
sizeof(
Double_t));
361 for (
Int_t ievt=0; ievt<
Data()->GetNEvents(); ievt++) {
364 const Event* ev = GetEvent(ievt);
372 if (DataInfo().IsSignal(ev)) {
373 Double_t v = ( (xval[
x] - (*fMeanMatx)(
x, 0))*(xval[
y] - (*fMeanMatx)(
y, 0)) )*weight;
376 Double_t v = ( (xval[
x] - (*fMeanMatx)(
x, 1))*(xval[
y] - (*fMeanMatx)(
y, 1)) )*weight;
395 (*fWith)(
x,
y) = sumSig[k]/fSumOfWeightsS + sumBgd[k]/fSumOfWeightsB;
413 assert( fSumOfWeightsS > 0 && fSumOfWeightsB > 0);
420 prodSig = ( ((*fMeanMatx)(
x, 0) - (*fMeanMatx)(
x, 2))*
421 ((*fMeanMatx)(
y, 0) - (*fMeanMatx)(
y, 2)) );
422 prodBgd = ( ((*fMeanMatx)(
x, 1) - (*fMeanMatx)(
x, 2))*
423 ((*fMeanMatx)(
y, 1) - (*fMeanMatx)(
y, 2)) );
425 (*fBetw)(
x,
y) = (fSumOfWeightsS*prodSig + fSumOfWeightsB*prodBgd) / (fSumOfWeightsS + fSumOfWeightsB);
437 (*fCov)(
x,
y) = (*fWith)(
x,
y) + (*fBetw)(
x,
y);
453 assert( fSumOfWeightsS > 0 && fSumOfWeightsB > 0);
457 switch (GetFisherMethod()) {
465 Log() <<
kFATAL <<
"<GetFisherCoeff> undefined method" << GetFisherMethod() <<
Endl;
471 Log() <<
kWARNING <<
"<GetFisherCoeff> matrix is almost singular with deterninant="
473 <<
" did you use the variables that are linear combinations or highly correlated?"
478 Log() <<
kFATAL <<
"<GetFisherCoeff> matrix is singular with determinant="
480 <<
" did you use the variables that are linear combinations? \n"
481 <<
" do you any clue as to what went wrong in above printout of the covariance matrix? "
488 Double_t xfact =
TMath::Sqrt( fSumOfWeightsS*fSumOfWeightsB ) / (fSumOfWeightsS + fSumOfWeightsB);
491 std::vector<Double_t> diffMeans( GetNvar() );
493 for (ivar=0; ivar<GetNvar(); ivar++) {
494 (*fFisherCoeff)[ivar] = 0;
496 for (jvar=0; jvar<GetNvar(); jvar++) {
497 Double_t d = (*fMeanMatx)(jvar, 0) - (*fMeanMatx)(jvar, 1);
498 (*fFisherCoeff)[ivar] += invCov(ivar, jvar)*d;
501 (*fFisherCoeff)[ivar] *= xfact;
507 for (ivar=0; ivar<GetNvar(); ivar++){
508 fF0 += (*fFisherCoeff)[ivar]*((*fMeanMatx)(ivar, 0) + (*fMeanMatx)(ivar, 1));
523 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
524 if ((*fCov)(ivar, ivar) != 0)
525 (*fDiscrimPow)[ivar] = (*fBetw)(ivar, ivar)/(*fCov)(ivar, ivar);
527 (*fDiscrimPow)[ivar] = 0;
537 fRanking =
new Ranking( GetName(),
"Discr. power" );
539 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
540 fRanking->AddRank(
Rank( GetInputLabel(ivar), (*fDiscrimPow)[ivar] ) );
552 Log() <<
kINFO <<
"Results for Fisher coefficients:" <<
Endl;
554 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
555 Log() <<
kINFO <<
"NOTE: The coefficients must be applied to TRANFORMED variables" <<
Endl;
556 Log() <<
kINFO <<
" List of the transformation: " <<
Endl;
557 TListIter trIt(&GetTransformationHandler().GetTransformationList());
562 std::vector<TString> vars;
563 std::vector<Double_t> coeffs;
564 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
565 vars .push_back( GetInputLabel(ivar) );
566 coeffs.push_back( (*fFisherCoeff)[ivar] );
568 vars .push_back(
"(offset)" );
569 coeffs.push_back( fF0 );
575 if (IsNormalised()) {
576 Log() <<
kINFO <<
"NOTE: You have chosen to use the \"Normalise\" booking option. Hence, the" <<
Endl;
577 Log() <<
kINFO <<
" coefficients must be applied to NORMALISED (') variables as follows:" <<
Endl;
579 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++)
if (GetInputLabel(ivar).Length() > maxL) maxL = GetInputLabel(ivar).Length();
582 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
584 << std::setw(maxL+9) <<
TString(
"[") + GetInputLabel(ivar) +
"]' = 2*("
585 << std::setw(maxL+2) <<
TString(
"[") + GetInputLabel(ivar) +
"]"
586 << std::setw(3) << (GetXmin(ivar) > 0 ?
" - " :
" + ")
587 << std::setw(6) <<
TMath::Abs(GetXmin(ivar)) << std::setw(3) <<
")/"
588 << std::setw(6) << (GetXmax(ivar) - GetXmin(ivar) )
589 << std::setw(3) <<
" - 1"
592 Log() <<
kINFO <<
"The TMVA Reader will properly account for this normalisation, but if the" <<
Endl;
593 Log() <<
kINFO <<
"Fisher classifier is applied outside the Reader, the transformation must be" <<
Endl;
594 Log() <<
kINFO <<
"implemented -- or the \"Normalise\" option is removed and Fisher retrained." <<
Endl;
605 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) istr >> (*fFisherCoeff)[ivar];
618 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
621 gTools().
AddAttr( coeffxml,
"Value", (*fFisherCoeff)[ivar] );
632 fFisherCoeff->resize(ncoeff-1);
639 if (coeffidx==0) fF0 = coeff;
640 else (*fFisherCoeff)[coeffidx-1] = coeff;
650 Int_t dp = fout.precision();
651 fout <<
" double fFisher0;" << std::endl;
652 fout <<
" std::vector<double> fFisherCoefficients;" << std::endl;
653 fout <<
"};" << std::endl;
654 fout <<
"" << std::endl;
655 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
656 fout <<
"{" << std::endl;
657 fout <<
" fFisher0 = " << std::setprecision(12) << fF0 <<
";" << std::endl;
658 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
659 fout <<
" fFisherCoefficients.push_back( " << std::setprecision(12) << (*fFisherCoeff)[ivar] <<
" );" << std::endl;
662 fout <<
" // sanity check" << std::endl;
663 fout <<
" if (fFisherCoefficients.size() != fNvars) {" << std::endl;
664 fout <<
" std::cout << \"Problem in class \\\"\" << fClassName << \"\\\"::Initialize: mismatch in number of input values\"" << std::endl;
665 fout <<
" << fFisherCoefficients.size() << \" != \" << fNvars << std::endl;" << std::endl;
666 fout <<
" fStatusIsClean = false;" << std::endl;
667 fout <<
" } " << std::endl;
668 fout <<
"}" << std::endl;
670 fout <<
"inline double " << className <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
671 fout <<
"{" << std::endl;
672 fout <<
" double retval = fFisher0;" << std::endl;
673 fout <<
" for (size_t ivar = 0; ivar < fNvars; ivar++) {" << std::endl;
674 fout <<
" retval += fFisherCoefficients[ivar]*inputValues[ivar];" << std::endl;
675 fout <<
" }" << std::endl;
677 fout <<
" return retval;" << std::endl;
678 fout <<
"}" << std::endl;
680 fout <<
"// Clean up" << std::endl;
681 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
682 fout <<
"{" << std::endl;
683 fout <<
" // clear coefficients" << std::endl;
684 fout <<
" fFisherCoefficients.clear(); " << std::endl;
685 fout <<
"}" << std::endl;
686 fout << std::setprecision(dp);
700 Log() <<
"Fisher discriminants select events by distinguishing the mean " <<
Endl;
701 Log() <<
"values of the signal and background distributions in a trans- " <<
Endl;
702 Log() <<
"formed variable space where linear correlations are removed." <<
Endl;
704 Log() <<
" (More precisely: the \"linear discriminator\" determines" <<
Endl;
705 Log() <<
" an axis in the (correlated) hyperspace of the input " <<
Endl;
706 Log() <<
" variables such that, when projecting the output classes " <<
Endl;
707 Log() <<
" (signal and background) upon this axis, they are pushed " <<
Endl;
708 Log() <<
" as far as possible away from each other, while events" <<
Endl;
709 Log() <<
" of a same class are confined in a close vicinity. The " <<
Endl;
710 Log() <<
" linearity property of this classifier is reflected in the " <<
Endl;
711 Log() <<
" metric with which \"far apart\" and \"close vicinity\" are " <<
Endl;
712 Log() <<
" determined: the covariance matrix of the discriminating" <<
Endl;
713 Log() <<
" variable space.)" <<
Endl;
717 Log() <<
"Optimal performance for Fisher discriminants is obtained for " <<
Endl;
718 Log() <<
"linearly correlated Gaussian-distributed variables. Any deviation" <<
Endl;
719 Log() <<
"from this ideal reduces the achievable separation power. In " <<
Endl;
720 Log() <<
"particular, no discrimination at all is achieved for a variable" <<
Endl;
721 Log() <<
"that has the same sample mean for signal and background, even if " <<
Endl;
722 Log() <<
"the shapes of the distributions are very different. Thus, Fisher " <<
Endl;
723 Log() <<
"discriminants often benefit from suitable transformations of the " <<
Endl;
724 Log() <<
"input variables. For example, if a variable x in [-1,1] has a " <<
Endl;
725 Log() <<
"a parabolic signal distributions, and a uniform background" <<
Endl;
726 Log() <<
"distributions, their mean value is zero in both cases, leading " <<
Endl;
727 Log() <<
"to no separation. The simple transformation x -> |x| renders this " <<
Endl;
728 Log() <<
"variable powerful for the use in a Fisher discriminant." <<
Endl;
void GetCov_BetweenClass(void)
the matrix of covariance 'between class' reflects the dispersion of the events of a class relative to...
const Ranking * CreateRanking()
computes ranking of input variables
void MakeClassSpecific(std::ostream &, const TString &) const
write Fisher-specific classifier response
MsgLogger & Endl(MsgLogger &ml)
void ReadWeightsFromStream(std::istream &i)
read Fisher coefficients from weight file
virtual ~MethodFisher(void)
destructor
void AddWeightsXMLTo(void *parent) const
create XML description of Fisher classifier
void GetCov_Full(void)
compute full covariance matrix from sum of within and between matrices
void GetDiscrimPower(void)
computation of discrimination power indicator for each variable small values of "fWith" indicates lit...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Fisher can only handle classification with 2 classes.
MethodFisher(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="Fisher", TDirectory *theTargetDir=0)
standard constructor for the "Fisher"
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
std::vector< std::vector< double > > Data
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
TMatrixT< Double_t > TMatrixD
void Print(Option_t *name="") const
Print the matrix as a table of elements.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns the Fisher value (no fixed range)
void Train(void)
computation of Fisher coefficients by series of matrix operations
void ReadWeightsFromXML(void *wghtnode)
read Fisher coefficients from xml weight file
void ProcessOptions()
process user options
ClassImp(TMVA::MethodFisher)
void InitMatrices(void)
initializaton method; creates global matrices and vectors
void GetHelpMessage() const
get help message text
void DeclareOptions()
MethodFisher options: format and syntax of option string: "type" where type is "Fisher" or "Mahalanob...
void GetMean(void)
compute mean values of variables in each sample, and the overall means
void GetCov_WithinClass(void)
the matrix of covariance 'within class' reflects the dispersion of the events relative to the center ...
Describe directory structure in memory.
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
virtual Double_t Determinant() const
Return the matrix determinant.
void PrintCoefficients(void)
display Fisher coefficients and discriminating power for each variable check maximum length of variab...
Double_t Sqrt(Double_t x)
void GetFisherCoeff(void)
Fisher = Sum { [coeff]*[variables] }.
void Init(void)
default initialization called by all constructors