59 TMVA::MethodLD::MethodLD( const
TString& jobName,
64 MethodBase( jobName, Types::kLD, methodTitle, dsi, theOption, theTargetDir ),
91 if(DataInfo().GetNTargets()!=0) fNRegOut = DataInfo().GetNTargets();
94 fLDCoeff =
new vector< vector< Double_t >* >(fNRegOut);
95 for (
Int_t iout = 0; iout<fNRegOut; iout++){
96 (*fLDCoeff)[iout] =
new std::vector<Double_t>( GetNvar()+1 );
100 SetSignalReferenceCut( 0.0 );
108 if (fSumMatx) {
delete fSumMatx; fSumMatx = 0; }
109 if (fSumValMatx) {
delete fSumValMatx; fSumValMatx = 0; }
110 if (fCoeffMatx) {
delete fCoeffMatx; fCoeffMatx = 0; }
112 for (vector< vector< Double_t >* >::iterator vi=fLDCoeff->begin(); vi!=fLDCoeff->end(); vi++){
113 if (*vi) {
delete *vi; *vi = 0; }
115 delete fLDCoeff; fLDCoeff = 0;
126 Log() <<
"regression with " << numberTargets <<
" targets.";
155 const Event* ev = GetEvent();
157 if (fRegressionReturnVal ==
NULL) fRegressionReturnVal =
new vector< Float_t >();
158 fRegressionReturnVal->resize( fNRegOut );
160 for (
Int_t iout = 0; iout<fNRegOut; iout++) {
161 (*fRegressionReturnVal)[iout] = (*(*fLDCoeff)[iout])[0] ;
164 for (std::vector<Float_t>::const_iterator it = ev->
GetValues().begin();it!=ev->
GetValues().end();++it){
165 (*fRegressionReturnVal)[iout] += (*(*fLDCoeff)[iout])[++icoeff] * (*it);
170 NoErrorCalc(err, errUpper);
172 return (*fRegressionReturnVal)[0];
180 const Event* ev = GetEvent();
182 if (fRegressionReturnVal ==
NULL) fRegressionReturnVal =
new vector< Float_t >();
183 fRegressionReturnVal->resize( fNRegOut );
185 for (
Int_t iout = 0; iout<fNRegOut; iout++) {
186 (*fRegressionReturnVal)[iout] = (*(*fLDCoeff)[iout])[0] ;
189 for (std::vector<Float_t>::const_iterator it = ev->
GetValues().begin();it!=ev->
GetValues().end();++it){
190 (*fRegressionReturnVal)[iout] += (*(*fLDCoeff)[iout])[++icoeff] * (*it);
196 for (
Int_t iout = 0; iout<fNRegOut; iout++) evT->
SetTarget(iout,(*fRegressionReturnVal)[iout]);
198 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
199 fRegressionReturnVal->clear();
200 for (
Int_t iout = 0; iout<fNRegOut; iout++) fRegressionReturnVal->push_back(evT2->
GetTarget(iout));
203 return (*fRegressionReturnVal);
211 fSumMatx =
new TMatrixD( GetNvar()+1, GetNvar()+1 );
212 fSumValMatx =
new TMatrixD( GetNvar()+1, fNRegOut );
213 fCoeffMatx =
new TMatrixD( GetNvar()+1, fNRegOut );
223 const UInt_t nvar = DataInfo().GetNVariables();
225 for (
UInt_t ivar = 0; ivar<=nvar; ivar++){
226 for (
UInt_t jvar = 0; jvar<=nvar; jvar++) (*fSumMatx)( ivar, jvar ) = 0;
231 for (
Int_t ievt=0; ievt<nevts; ievt++) {
232 const Event * ev = GetEvent(ievt);
235 if (IgnoreEventsWithNegWeightsInTraining() && weight <= 0)
continue;
238 (*fSumMatx)( 0, 0 ) += weight;
241 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
242 (*fSumMatx)( ivar+1, 0 ) += ev->
GetValue( ivar ) * weight;
243 (*fSumMatx)( 0, ivar+1 ) += ev->
GetValue( ivar ) * weight;
247 for (
UInt_t ivar=0; ivar<nvar; ivar++){
248 for (
UInt_t jvar=0; jvar<nvar; jvar++){
249 (*fSumMatx)( ivar+1, jvar+1 ) += ev->
GetValue( ivar ) * ev->
GetValue( jvar ) * weight;
260 const UInt_t nvar = DataInfo().GetNVariables();
262 for (
Int_t ivar = 0; ivar<fNRegOut; ivar++){
263 for (
UInt_t jvar = 0; jvar<=nvar; jvar++){
264 (*fSumValMatx)(jvar,ivar) = 0;
269 for (
Int_t ievt=0; ievt<
Data()->GetNEvents(); ievt++) {
272 const Event* ev = GetEvent(ievt);
276 if (IgnoreEventsWithNegWeightsInTraining() && weight <= 0)
continue;
278 for (
Int_t ivar=0; ivar<fNRegOut; ivar++) {
282 if (!DoRegression()){
283 val *= DataInfo().IsSignal(ev);
287 (*fSumValMatx)( 0,ivar ) += val;
288 for (
UInt_t jvar=0; jvar<nvar; jvar++) {
289 (*fSumValMatx)(jvar+1,ivar ) += ev->
GetValue(jvar) * val;
300 const UInt_t nvar = DataInfo().GetNVariables();
302 for (
Int_t ivar = 0; ivar<fNRegOut; ivar++){
305 Log() <<
kWARNING <<
"<GetCoeff> matrix is almost singular with determinant="
307 <<
" did you use the variables that are linear combinations or highly correlated?"
311 Log() <<
kFATAL <<
"<GetCoeff> matrix is singular with determinant="
313 <<
" did you use the variables that are linear combinations?"
318 fCoeffMatx =
new TMatrixD( invSum * (*fSumValMatx));
319 for (
UInt_t jvar = 0; jvar<nvar+1; jvar++) {
320 (*(*fLDCoeff)[ivar])[jvar] = (*fCoeffMatx)(jvar, ivar );
322 if (!DoRegression()) {
323 (*(*fLDCoeff)[ivar])[0]=0.0;
324 for (
UInt_t jvar = 1; jvar<nvar+1; jvar++){
325 (*(*fLDCoeff)[ivar])[0]+=(*fCoeffMatx)(jvar,ivar)*(*fSumMatx)(0,jvar)/(*fSumMatx)( 0, 0 );
327 (*(*fLDCoeff)[ivar])[0]/=-2.0;
338 for (
Int_t iout=0; iout<fNRegOut; iout++){
339 for (
UInt_t icoeff=0; icoeff<GetNvar()+1; icoeff++){
340 istr >> (*(*fLDCoeff)[iout])[icoeff];
354 for (
Int_t iout=0; iout<fNRegOut; iout++) {
355 for (
UInt_t icoeff=0; icoeff<GetNvar()+1; icoeff++) {
359 gTools().
AddAttr( coeffxml,
"Value", (*(*fLDCoeff)[iout])[icoeff] );
374 if (ncoeff != GetNvar()+1)
Log() <<
kFATAL <<
"Mismatch in number of output variables/coefficients: "
375 << ncoeff <<
" != " << GetNvar()+1 <<
Endl;
379 for (vector< vector< Double_t >* >::iterator vi=fLDCoeff->begin(); vi!=fLDCoeff->end(); vi++){
380 if (*vi) {
delete *vi; *vi = 0; }
382 delete fLDCoeff; fLDCoeff = 0;
384 fLDCoeff =
new vector< vector< Double_t >* >(fNRegOut);
385 for (
Int_t ivar = 0; ivar<fNRegOut; ivar++) (*fLDCoeff)[ivar] = new std::vector<Double_t>( ncoeff );
395 (*(*fLDCoeff)[iout])[icoeff] = coeff;
406 fout <<
" std::vector<double> fLDCoefficients;" << std::endl;
407 fout <<
"};" << std::endl;
408 fout <<
"" << std::endl;
409 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
410 fout <<
"{" << std::endl;
411 for (
UInt_t ivar=0; ivar<GetNvar()+1; ivar++) {
412 Int_t dp = fout.precision();
413 fout <<
" fLDCoefficients.push_back( "
414 << std::setprecision(12) << (*(*fLDCoeff)[0])[ivar]
415 << std::setprecision(dp) <<
" );" << std::endl;
418 fout <<
" // sanity check" << std::endl;
419 fout <<
" if (fLDCoefficients.size() != fNvars+1) {" << std::endl;
420 fout <<
" std::cout << \"Problem in class \\\"\" << fClassName << \"\\\"::Initialize: mismatch in number of input values\"" << std::endl;
421 fout <<
" << fLDCoefficients.size() << \" != \" << fNvars+1 << std::endl;" << std::endl;
422 fout <<
" fStatusIsClean = false;" << std::endl;
423 fout <<
" } " << std::endl;
424 fout <<
"}" << std::endl;
426 fout <<
"inline double " << className <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
427 fout <<
"{" << std::endl;
428 fout <<
" double retval = fLDCoefficients[0];" << std::endl;
429 fout <<
" for (size_t ivar = 1; ivar < fNvars+1; ivar++) {" << std::endl;
430 fout <<
" retval += fLDCoefficients[ivar]*inputValues[ivar-1];" << std::endl;
431 fout <<
" }" << std::endl;
433 fout <<
" return retval;" << std::endl;
434 fout <<
"}" << std::endl;
436 fout <<
"// Clean up" << std::endl;
437 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
438 fout <<
"{" << std::endl;
439 fout <<
" // clear coefficients" << std::endl;
440 fout <<
" fLDCoefficients.clear(); " << std::endl;
441 fout <<
"}" << std::endl;
449 fRanking =
new Ranking( GetName(),
"Discr. power" );
451 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
452 fRanking->AddRank(
Rank( GetInputLabel(ivar),
TMath::Abs((* (*fLDCoeff)[0])[ivar+1] )) );
471 if (HasTrainingTree()) InitMatrices();
479 Log() <<
kINFO <<
"Results for LD coefficients:" <<
Endl;
481 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
482 Log() <<
kINFO <<
"NOTE: The coefficients must be applied to TRANFORMED variables" <<
Endl;
483 Log() <<
kINFO <<
" List of the transformation: " <<
Endl;
484 TListIter trIt(&GetTransformationHandler().GetTransformationList());
489 std::vector<TString> vars;
490 std::vector<Double_t> coeffs;
491 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
492 vars .push_back( GetInputLabel(ivar) );
493 coeffs.push_back( (* (*fLDCoeff)[0])[ivar+1] );
495 vars .push_back(
"(offset)" );
496 coeffs.push_back((* (*fLDCoeff)[0])[0] );
498 if (IsNormalised()) {
499 Log() <<
kINFO <<
"NOTE: You have chosen to use the \"Normalise\" booking option. Hence, the" <<
Endl;
500 Log() <<
kINFO <<
" coefficients must be applied to NORMALISED (') variables as follows:" <<
Endl;
502 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++)
if (GetInputLabel(ivar).Length() > maxL) maxL = GetInputLabel(ivar).Length();
505 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
507 << std::setw(maxL+9) <<
TString(
"[") + GetInputLabel(ivar) +
"]' = 2*("
508 << std::setw(maxL+2) <<
TString(
"[") + GetInputLabel(ivar) +
"]"
509 << std::setw(3) << (GetXmin(ivar) > 0 ?
" - " :
" + ")
510 << std::setw(6) <<
TMath::Abs(GetXmin(ivar)) << std::setw(3) <<
")/"
511 << std::setw(6) << (GetXmax(ivar) - GetXmin(ivar) )
512 << std::setw(3) <<
" - 1"
515 Log() <<
kINFO <<
"The TMVA Reader will properly account for this normalisation, but if the" <<
Endl;
516 Log() <<
kINFO <<
"LD classifier is applied outside the Reader, the transformation must be" <<
Endl;
517 Log() <<
kINFO <<
"implemented -- or the \"Normalise\" option is removed and LD retrained." <<
Endl;
533 Log() <<
"Linear discriminants select events by distinguishing the mean " <<
Endl;
534 Log() <<
"values of the signal and background distributions in a trans- " <<
Endl;
535 Log() <<
"formed variable space where linear correlations are removed." <<
Endl;
536 Log() <<
"The LD implementation here is equivalent to the \"Fisher\" discriminant" <<
Endl;
537 Log() <<
"for classification, but also provides linear regression." <<
Endl;
539 Log() <<
" (More precisely: the \"linear discriminator\" determines" <<
Endl;
540 Log() <<
" an axis in the (correlated) hyperspace of the input " <<
Endl;
541 Log() <<
" variables such that, when projecting the output classes " <<
Endl;
542 Log() <<
" (signal and background) upon this axis, they are pushed " <<
Endl;
543 Log() <<
" as far as possible away from each other, while events" <<
Endl;
544 Log() <<
" of a same class are confined in a close vicinity. The " <<
Endl;
545 Log() <<
" linearity property of this classifier is reflected in the " <<
Endl;
546 Log() <<
" metric with which \"far apart\" and \"close vicinity\" are " <<
Endl;
547 Log() <<
" determined: the covariance matrix of the discriminating" <<
Endl;
548 Log() <<
" variable space.)" <<
Endl;
552 Log() <<
"Optimal performance for the linear discriminant is obtained for " <<
Endl;
553 Log() <<
"linearly correlated Gaussian-distributed variables. Any deviation" <<
Endl;
554 Log() <<
"from this ideal reduces the achievable separation power. In " <<
Endl;
555 Log() <<
"particular, no discrimination at all is achieved for a variable" <<
Endl;
556 Log() <<
"that has the same sample mean for signal and background, even if " <<
Endl;
557 Log() <<
"the shapes of the distributions are very different. Thus, the linear " <<
Endl;
558 Log() <<
"discriminant often benefits from a suitable transformation of the " <<
Endl;
559 Log() <<
"input variables. For example, if a variable x in [-1,1] has a " <<
Endl;
560 Log() <<
"a parabolic signal distributions, and a uniform background" <<
Endl;
561 Log() <<
"distributions, their mean value is zero in both cases, leading " <<
Endl;
562 Log() <<
"to no separation. The simple transformation x -> |x| renders this " <<
Endl;
563 Log() <<
"variable powerful for the use in a linear discriminant." <<
Endl;
virtual const std::vector< Float_t > & GetRegressionValues()
Calculates the regression output.
const Ranking * CreateRanking()
computes ranking of input variables
MsgLogger & Endl(MsgLogger &ml)
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
LD can handle classification with 2 classes and regression with one regression-target.
void GetSumVal(void)
Calculates the vector transposed(X)*W*Y with Y being the target vector.
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
virtual ~MethodLD(void)
destructor
void PrintCoefficients(void)
Display the classification/regression coefficients for each variable.
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
ClassImp(TMVA::MethodLD) TMVA
standard constructor for the LD
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Returns the MVA classification output.
std::vector< std::vector< double > > Data
MethodLD(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="LD", TDirectory *theTargetDir=0)
void MakeClassSpecific(std::ostream &, const TString &) const
write LD-specific classifier response
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
TMatrixT< Double_t > TMatrixD
void ReadWeightsFromStream(std::istream &i)
read LD coefficients from weight file
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
void DeclareOptions()
MethodLD options.
Describe directory structure in memory.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
void GetSum(void)
Calculates the matrix transposed(X)*W*X with W being the diagonal weight matrix and X the coordinates...
Float_t GetTarget(UInt_t itgt) const
#define REGISTER_METHOD(CLASS)
for example
std::vector< Float_t > & GetValues()
void GetLDCoeff(void)
Calculates the coeffiecients used for classification/regression.
virtual Double_t Determinant() const
Return the matrix determinant.
void Train(void)
compute fSumMatx
void InitMatrices(void)
Initializaton method; creates global matrices and vectors.
void Init(void)
default initialization called by all constructors
void GetHelpMessage() const
get help message text
void ProcessOptions()
this is the preparation for training