44 #ifndef ROOT_TMVA_MethodSVM
47 #ifndef ROOT_TMVA_Tools
50 #ifndef ROOT_TMVA_Timer
54 #ifndef ROOT_TMVA_SVWorkingSet
58 #ifndef ROOT_TMVA_SVEvent
62 #ifndef ROOT_TMVA_SVKernelFunction
78 TMVA::MethodSVM::MethodSVM( const
TString& jobName, const
TString& methodTitle, DataSetInfo& theData,
80 : MethodBase( jobName, Types::kSVM, methodTitle, theData, theOption, theTargetDir )
90 , fSVKernelFunction(0)
93 , fDoubleSigmaSquared(0)
114 , fSVKernelFunction(0)
117 , fDoubleSigmaSquared(0)
129 if (fInputData !=0) {
delete fInputData; fInputData=0; }
130 if (fSupportVectors !=0 ) {
delete fSupportVectors; fSupportVectors = 0; }
131 if (fWgSet !=0) {
delete fWgSet; fWgSet=0; }
132 if (fSVKernelFunction !=0 ) {
delete fSVKernelFunction; fSVKernelFunction = 0; }
151 SetNormalised(
kTRUE );
156 fInputData =
new std::vector<TMVA::SVEvent*>(0);
157 fSupportVectors =
new std::vector<TMVA::SVEvent*>(0);
166 DeclareOptionRef( fGamma = 1.,
"Gamma",
"RBF kernel parameter: Gamma (size of the Kernel)");
168 DeclareOptionRef( fCost,
"C",
"Cost parameter" );
169 if (DoRegression()) {
174 DeclareOptionRef( fTolerance = 0.01,
"Tol",
"Tolerance parameter" );
175 DeclareOptionRef( fMaxIter = 1000,
"MaxIter",
"Maximum number of training loops" );
185 DeclareOptionRef( fNSubSets = 1,
"NSubSets",
"Number of training subsets" );
186 DeclareOptionRef( fTheKernel =
"Gauss",
"Kernel",
"Uses kernel function");
188 DeclareOptionRef( fDoubleSigmaSquared = 2.,
"Sigma",
"Kernel parameter: sigma");
190 DeclareOptionRef( fOrder = 3,
"Order",
"Polynomial Kernel parameter: polynomial order");
192 DeclareOptionRef( fTheta = 1.,
"Theta",
"Sigmoid Kernel parameter: theta");
193 DeclareOptionRef( fKappa = 1.,
"Kappa",
"Sigmoid Kernel parameter: kappa");
201 if (IgnoreEventsWithNegWeightsInTraining()) {
202 Log() <<
kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: "
203 << GetMethodTypeName()
204 <<
" --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
217 for (
Int_t ievt=0; ievt<
Data()->GetNEvents(); ievt++){
218 if (GetEvent(ievt)->GetWeight() != 0)
219 fInputData->push_back(
new SVEvent(GetEvent(ievt), fCost, DataInfo().IsSignal(GetEvent(ievt))));
224 Log()<<
kINFO <<
"Building SVM Working Set...with "<<fInputData->size()<<
" event instances"<<
Endl;
225 Timer bldwstime( GetName());
226 fWgSet =
new SVWorkingSet( fInputData, fSVKernelFunction,fTolerance, DoRegression() );
231 Log() <<
kINFO <<
"Sorry, no computing time forecast available for SVM, please wait ..." <<
Endl;
233 fWgSet->Train(fMaxIter);
238 fBparm = fWgSet->GetBpar();
239 fSupportVectors = fWgSet->GetSupportVectors();
260 for (std::vector<TMVA::SVEvent*>::iterator veciter=fSupportVectors->begin();
261 veciter!=fSupportVectors->end() ; ++veciter ) {
263 temp[0] = (*veciter)->GetNs();
264 temp[1] = (*veciter)->GetTypeFlag();
265 temp[2] = (*veciter)->GetAlpha();
266 temp[3] = (*veciter)->GetAlpha_p();
267 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++)
268 temp[ivar+4] = (*(*veciter)->GetDataVector())[ivar];
273 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++)
276 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++)
294 std::vector<Float_t>* svector =
new std::vector<Float_t>(GetNvar());
296 if (fMaxVars!=0)
delete fMaxVars;
297 fMaxVars =
new TVectorD( GetNvar() );
298 if (fMinVars!=0)
delete fMinVars;
299 fMinVars =
new TVectorD( GetNvar() );
300 if (fSupportVectors!=0) {
301 for (vector< SVEvent* >::iterator it = fSupportVectors->begin(); it!=fSupportVectors->end(); ++it)
303 delete fSupportVectors;
305 fSupportVectors =
new std::vector<TMVA::SVEvent*>(0);
307 for (
UInt_t ievt = 0; ievt < fNsupv; ievt++) {
311 typeFlag=(int)temp[1];
314 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) (*svector)[ivar]=temp[ivar+4];
316 fSupportVectors->push_back(
new SVEvent(svector,alpha,alpha_p,typeFlag));
320 void* maxminnode = supportvectornode;
321 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++)
324 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++)
326 if (fSVKernelFunction!=0)
delete fSVKernelFunction;
343 if (fSupportVectors !=0) {
delete fSupportVectors; fSupportVectors = 0;}
344 fSupportVectors =
new std::vector<TMVA::SVEvent*>(0);
352 fSupportVectors->reserve(fNsupv);
358 std::vector<Float_t>* svector =
new std::vector<Float_t>(GetNvar());
360 fMaxVars =
new TVectorD( GetNvar() );
361 fMinVars =
new TVectorD( GetNvar() );
363 for (
UInt_t ievt = 0; ievt < fNsupv; ievt++) {
366 typeFlag = typeTalpha<0?-1:1;
367 alpha = typeTalpha<0?-typeTalpha:typeTalpha;
368 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> svector->at(ivar);
370 fSupportVectors->push_back(
new SVEvent(svector,alpha,typeFlag,ns));
373 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMaxVars)[ivar];
375 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMinVars)[ivar];
377 delete fSVKernelFunction;
378 if (fTheKernel ==
"Gauss" ) {
387 Log() <<
kFATAL <<
"Unknown kernel function found in weight file!" <<
Endl;
390 fSVKernelFunction->setCompatibilityParams(k, fOrder, fTheta, fKappa);
412 for (
UInt_t ievt = 0; ievt < fSupportVectors->size() ; ievt++) {
413 myMVA += ( fSupportVectors->at(ievt)->GetAlpha()
414 * fSupportVectors->at(ievt)->GetTypeFlag()
415 * fSVKernelFunction->Evaluate( fSupportVectors->at(ievt), ev ) );
423 NoErrorCalc(err, errUpper);
432 if( fRegressionReturnVal ==
NULL )
433 fRegressionReturnVal =
new std::vector<Float_t>();
434 fRegressionReturnVal->clear();
438 const Event *baseev = GetEvent();
441 for (
UInt_t ievt = 0; ievt < fSupportVectors->size() ; ievt++) {
442 myMVA += ( fSupportVectors->at(ievt)->GetDeltaAlpha()
443 *fSVKernelFunction->Evaluate( fSupportVectors->at(ievt), ev ) );
449 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
451 fRegressionReturnVal->push_back(evT2->
GetTarget(0));
457 return *fRegressionReturnVal;
465 const int fNsupv = fSupportVectors->size();
466 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
467 fout <<
" float fBparameter;" << std::endl;
468 fout <<
" int fNOfSuppVec;" << std::endl;
469 fout <<
" static float fAllSuppVectors[][" << fNsupv <<
"];" << std::endl;
470 fout <<
" static float fAlphaTypeCoef[" << fNsupv <<
"];" << std::endl;
472 fout <<
" // Kernel parameter(s) " << std::endl;
473 fout <<
" float fGamma;" << std::endl;
474 fout <<
"};" << std::endl;
475 fout <<
"" << std::endl;
478 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
479 fout <<
"{" << std::endl;
480 fout <<
" fBparameter = " << fBparm <<
";" << std::endl;
481 fout <<
" fNOfSuppVec = " << fNsupv <<
";" << std::endl;
482 fout <<
" fGamma = " << fGamma <<
";" <<std::endl;
483 fout <<
"}" << std::endl;
487 fout <<
"inline double " << className <<
"::GetMvaValue__(const std::vector<double>& inputValues ) const" << std::endl;
488 fout <<
"{" << std::endl;
489 fout <<
" double mvaval = 0; " << std::endl;
490 fout <<
" double temp = 0; " << std::endl;
492 fout <<
" for (int ievt = 0; ievt < fNOfSuppVec; ievt++ ){" << std::endl;
493 fout <<
" temp = 0;" << std::endl;
494 fout <<
" for ( unsigned int ivar = 0; ivar < GetNvar(); ivar++ ) {" << std::endl;
496 fout <<
" temp += (fAllSuppVectors[ivar][ievt] - inputValues[ivar]) " << std::endl;
497 fout <<
" * (fAllSuppVectors[ivar][ievt] - inputValues[ivar]); " << std::endl;
498 fout <<
" }" << std::endl;
499 fout <<
" mvaval += fAlphaTypeCoef[ievt] * exp( -fGamma * temp ); " << std::endl;
501 fout <<
" }" << std::endl;
502 fout <<
" mvaval -= fBparameter;" << std::endl;
503 fout <<
" return 1./(1. + exp(mvaval));" << std::endl;
504 fout <<
"}" << std::endl;
505 fout <<
"// Clean up" << std::endl;
506 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
507 fout <<
"{" << std::endl;
508 fout <<
" // nothing to clear " << std::endl;
509 fout <<
"}" << std::endl;
510 fout <<
"" << std::endl;
513 fout <<
"float " << className <<
"::fAlphaTypeCoef[] =" << std::endl;
515 for (
Int_t isv = 0; isv < fNsupv; isv++) {
516 fout << fSupportVectors->at(isv)->GetDeltaAlpha() * fSupportVectors->at(isv)->GetTypeFlag();
517 if (isv < fNsupv-1) fout <<
", ";
519 fout <<
" };" << std::endl << std::endl;
521 fout <<
"float " << className <<
"::fAllSuppVectors[][" << fNsupv <<
"] =" << std::endl;
523 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
526 for (
Int_t isv = 0; isv < fNsupv; isv++){
527 fout << fSupportVectors->at(isv)->GetDataVector()->at(ivar);
528 if (isv < fNsupv-1) fout <<
", ";
531 if (ivar < GetNvar()-1) fout <<
", " << std::endl;
532 else fout << std::endl;
534 fout <<
"};" << std::endl<< std::endl;
548 Log() <<
"The Support Vector Machine (SVM) builds a hyperplance separating" <<
Endl;
549 Log() <<
"signal and background events (vectors) using the minimal subset of " <<
Endl;
550 Log() <<
"all vectors used for training (support vectors). The extension to" <<
Endl;
551 Log() <<
"the non-linear case is performed by mapping input vectors into a " <<
Endl;
552 Log() <<
"higher-dimensional feature space in which linear separation is " <<
Endl;
553 Log() <<
"possible. The use of the kernel functions thereby eliminates the " <<
Endl;
554 Log() <<
"explicit transformation to the feature space. The implemented SVM " <<
Endl;
555 Log() <<
"algorithm performs the classification tasks using linear, polynomial, " <<
Endl;
556 Log() <<
"Gaussian and sigmoidal kernel functions. The Gaussian kernel allows " <<
Endl;
557 Log() <<
"to apply any discriminant shape in the input space." <<
Endl;
561 Log() <<
"SVM is a general purpose non-linear classification method, which " <<
Endl;
562 Log() <<
"does not require data preprocessing like decorrelation or Principal " <<
Endl;
563 Log() <<
"Component Analysis. It generalises quite well and can handle analyses " <<
Endl;
564 Log() <<
"with large numbers of input variables." <<
Endl;
568 Log() <<
"Optimal performance requires primarily a proper choice of the kernel " <<
Endl;
569 Log() <<
"parameters (the width \"Sigma\" in case of Gaussian kernel) and the" <<
Endl;
570 Log() <<
"cost parameter \"C\". The user must optimise them empirically by running" <<
Endl;
571 Log() <<
"SVM several times with different parameter sets. The time needed for " <<
Endl;
572 Log() <<
"each evaluation scales like the square of the number of training " <<
Endl;
573 Log() <<
"events so that a coarse preliminary tuning should be performed on " <<
Endl;
574 Log() <<
"reduced data sets." <<
Endl;
void Train(void)
Train SVM.
ClassImp(TMVA::MethodSVM) TMVA
standard constructor
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
MsgLogger & Endl(MsgLogger &ml)
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void ProcessOptions()
option post processing (if necessary)
std::vector< std::vector< double > > Data
void DeclareOptions()
declare options available for this method
TVectorT< Double_t > TVectorD
TString GetElapsedTime(Bool_t Scientific=kTRUE)
void AddWeightsXMLTo(void *parent) const
write configuration to xml file
void ReadWeightsFromXML(void *wghtnode)
void ReadWeightsFromStream(std::istream &istr)
void Init(void)
default initialisation
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
SVM can handle classification with 2 classes and regression with one regression-target.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
MethodSVM(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=0)
Describe directory structure in memory.
Float_t GetTarget(UInt_t itgt) const
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
virtual ~MethodSVM(void)
destructor
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
void WriteWeightsToStream(TFile &fout) const
TODO write IT write training sample (TTree) to file.
void GetHelpMessage() const
get help message text
const std::vector< Float_t > & GetRegressionValues()