Logo ROOT   6.08/07
Reference Guide
MethodSVM.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Marcin Wolter, Andrzej Zemla
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodSVM *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Marcin Wolter <Marcin.Wolter@cern.ch> - IFJ PAN, Krakow, Poland *
15  * Andrzej Zemla <azemla@cern.ch> - IFJ PAN, Krakow, Poland *
16  * (IFJ PAN: Henryk Niewodniczanski Inst. Nucl. Physics, Krakow, Poland) *
17  * *
18  * Introduction of regression by: *
19  * Krzysztof Danielowski <danielow@cern.ch> - IFJ PAN & AGH, Krakow, Poland *
20  * Kamil Kraszewski <kalq@cern.ch> - IFJ PAN & UJ, Krakow, Poland *
21  * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Krakow, Poland *
22  * *
23  * Introduction of kernel parameter optimisation *
24  * and additional kernel functions by: *
25  * Adrian Bevan <adrian.bevan@cern.ch> - Queen Mary *
26  * University of London, UK *
27  * Tom Stevenson <thomas.james.stevenson@cern.ch> - Queen Mary *
28  * University of London, UK *
29  * *
30  * Copyright (c) 2005: *
31  * CERN, Switzerland *
32  * MPI-K Heidelberg, Germany *
33  * PAN, Krakow, Poland *
34  * *
35  * Redistribution and use in source and binary forms, with or without *
36  * modification, are permitted according to the terms listed in LICENSE *
37  * (http://tmva.sourceforge.net/LICENSE) *
38  **********************************************************************************/
39 
40 //_______________________________________________________________________
41 //
42 // SMO Platt's SVM classifier with Keerthi & Shavade improvements
43 //_______________________________________________________________________
44 
45 #include "TMVA/MethodSVM.h"
46 
47 #ifndef ROOT_TMVA_Tools
48 #include "TMVA/Tools.h"
49 #endif
50 #ifndef ROOT_TMVA_Timer
51 #include "TMVA/Timer.h"
52 #endif
53 
54 #ifndef ROOT_TMVA_SVWorkingSet
55 #include "TMVA/SVWorkingSet.h"
56 #endif
57 
58 #ifndef ROOT_TMVA_SVEvent
59 #include "TMVA/SVEvent.h"
60 #endif
61 
62 #ifndef ROOT_TMVA_SVKernelFunction
63 #include "TMVA/SVKernelFunction.h"
64 #endif
65 
66 #include "TMVA/ClassifierFactory.h"
67 #include "TMVA/Configurable.h"
68 #include "TMVA/DataSet.h"
69 #include "TMVA/DataSetInfo.h"
70 #include "TMVA/Event.h"
71 #include "TMVA/IMethod.h"
72 #include "TMVA/MethodBase.h"
73 #include "TMVA/MsgLogger.h"
74 #include "TMVA/Types.h"
75 #include "TMVA/Interval.h"
77 #include "TMVA/Results.h"
79 #include "TMVA/VariableInfo.h"
80 
81 #include "Riostream.h"
82 #include "TFile.h"
83 #include "TVectorD.h"
84 #include "TMath.h"
85 
86 #include <string>
87 
88 using std::vector;
89 using std::string;
90 using std::stringstream;
91 
92 //const Int_t basketsize__ = 1280000;
93 REGISTER_METHOD(SVM)
94 
96 
97 ////////////////////////////////////////////////////////////////////////////////
98 /// standard constructor
99 
100  TMVA::MethodSVM::MethodSVM( const TString& jobName, const TString& methodTitle, DataSetInfo& theData,
101  const TString& theOption )
102  : MethodBase( jobName, Types::kSVM, methodTitle, theData, theOption)
103  , fCost(0)
104  , fTolerance(0)
105  , fMaxIter(0)
106  , fNSubSets(0)
107  , fBparm(0)
108  , fGamma(0)
109  , fWgSet(0)
110  , fInputData(0)
111  , fSupportVectors(0)
112  , fSVKernelFunction(0)
113  , fMinVars(0)
114  , fMaxVars(0)
115  , fDoubleSigmaSquared(0)
116  , fOrder(0)
117  , fTheta(0)
118  , fKappa(0)
119  , fMult(0)
120  ,fNumVars(0)
121  , fGammas("")
122  , fGammaList("")
123  , fDataSize(0)
124  , fLoss(0)
125 {
126  fVarNames.clear();
127  fNumVars = theData.GetVariableInfos().size();
128  for( int i=0; i<fNumVars; i++){
129  fVarNames.push_back(theData.GetVariableInfos().at(i).GetTitle());
130  }
131 }
132 
133 ////////////////////////////////////////////////////////////////////////////////
134 /// constructor from weight file
135 
136 TMVA::MethodSVM::MethodSVM( DataSetInfo& theData, const TString& theWeightFile)
137  : MethodBase( Types::kSVM, theData, theWeightFile)
138  , fCost(0)
139  , fTolerance(0)
140  , fMaxIter(0)
141  , fNSubSets(0)
142  , fBparm(0)
143  , fGamma(0)
144  , fWgSet(0)
145  , fInputData(0)
146  , fSupportVectors(0)
147  , fSVKernelFunction(0)
148  , fMinVars(0)
149  , fMaxVars(0)
150  , fDoubleSigmaSquared(0)
151  , fOrder(0)
152  , fTheta(0)
153  , fKappa(0)
154  , fMult(0)
155  , fNumVars(0)
156  , fGammas("")
157  , fGammaList("")
158  , fDataSize(0)
159  , fLoss(0)
160 {
161  fVarNames.clear();
162  fNumVars = theData.GetVariableInfos().size();
163  for( int i=0;i<fNumVars; i++){
164  fVarNames.push_back(theData.GetVariableInfos().at(i).GetTitle());
165  }
166 }
167 
168 ////////////////////////////////////////////////////////////////////////////////
169 /// destructor
170 
172 {
173  fSupportVectors->clear();
174  for (UInt_t i=0; i<fInputData->size(); i++) {
175  delete fInputData->at(i);
176  }
177  if (fWgSet !=0) { delete fWgSet; fWgSet=0; }
178  if (fSVKernelFunction !=0 ) { delete fSVKernelFunction; fSVKernelFunction = 0; }
179 }
180 
181 ////////////////////////////////////////////////////////////////////////////////
182 // reset the method, as if it had just been instantiated (forget all training etc.)
183 
185 {
186  // reset the method, as if it had just been instantiated (forget all training etc.)
187  fSupportVectors->clear();
188  for (UInt_t i=0; i<fInputData->size(); i++){
189  delete fInputData->at(i);
190  fInputData->at(i)=0;
191  }
192  fInputData->clear();
193  if (fWgSet !=0) { fWgSet=0; }
194  if (fSVKernelFunction !=0 ) { fSVKernelFunction = 0; }
195  if (Data()){
197  }
198 
199  Log() << kDEBUG << " successfully(?) reset the method " << Endl;
200 }
201 
202 ////////////////////////////////////////////////////////////////////////////////
203 /// SVM can handle classification with 2 classes and regression with one regression-target
204 
206 {
207  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
208  if (type == Types::kRegression && numberTargets == 1) return kTRUE;
209  return kFALSE;
210 }
211 
212 ////////////////////////////////////////////////////////////////////////////////
213 /// default initialisation
214 
216 {
217  // SVM always uses normalised input variables
218  SetNormalised( kTRUE );
219 
220  // Helge: do not book a event vector of given size but rather fill the vector
221  // later with pus_back. Anyway, this is NOT what is time consuming in
222  // SVM and it allows to skip totally events with weights == 0 ;)
223  fInputData = new std::vector<TMVA::SVEvent*>(0);
224  fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
225 }
226 
227 ////////////////////////////////////////////////////////////////////////////////
228 /// declare options available for this method
229 
231 {
232  DeclareOptionRef( fTheKernel = "RBF", "Kernel", "Pick which kernel ( RBF or MultiGauss )");
233  // for gaussian kernel parameter(s)
234  DeclareOptionRef( fGamma = 1., "Gamma", "RBF kernel parameter: Gamma (size of the Kernel)");
235  // for polynomial kernel parameter(s)
236  DeclareOptionRef( fOrder = 3, "Order", "Polynomial Kernel parameter: polynomial order");
237  DeclareOptionRef( fTheta = 1., "Theta", "Polynomial Kernel parameter: polynomial theta");
238  // for multi-gaussian kernel parameter(s)
239  DeclareOptionRef( fGammas = "", "GammaList", "MultiGauss parameters" );
240 
241  // for range and step number for kernel paramter optimisation
242  DeclareOptionRef( fTune = "All", "Tune", "Tune Parameters");
243  // for list of kernels to be used with product or sum kernel
244  DeclareOptionRef( fMultiKernels = "None", "KernelList", "Sum or product of kernels");
245  DeclareOptionRef( fLoss = "hinge", "Loss", "Loss function");
246 
247  DeclareOptionRef( fCost, "C", "Cost parameter" );
248  if (DoRegression()) {
249  fCost = 0.002;
250  }else{
251  fCost = 1.;
252  }
253  DeclareOptionRef( fTolerance = 0.01, "Tol", "Tolerance parameter" ); //should be fixed
254  DeclareOptionRef( fMaxIter = 1000, "MaxIter", "Maximum number of training loops" );
255 
256 }
257 
258 ////////////////////////////////////////////////////////////////////////////////
259 /// options that are used ONLY for the READER to ensure backward compatibility
260 
262 {
264  DeclareOptionRef( fNSubSets = 1, "NSubSets", "Number of training subsets" );
265  DeclareOptionRef( fTheKernel = "Gauss", "Kernel", "Uses kernel function");
266  // for gaussian kernel parameter(s)
267  DeclareOptionRef( fDoubleSigmaSquared = 2., "Sigma", "Kernel parameter: sigma");
268  // for polynomiarl kernel parameter(s)
269  DeclareOptionRef( fOrder = 3, "Order", "Polynomial Kernel parameter: polynomial order");
270  // for sigmoid kernel parameters
271  DeclareOptionRef( fTheta = 1., "Theta", "Sigmoid Kernel parameter: theta");
272  DeclareOptionRef( fKappa = 1., "Kappa", "Sigmoid Kernel parameter: kappa");
273 }
274 
275 ////////////////////////////////////////////////////////////////////////////////
276 /// option post processing (if necessary)
277 
279 {
281  Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
282  << GetMethodTypeName()
283  << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
284  << Endl;
285  }
286 }
287 
288 ////////////////////////////////////////////////////////////////////////////////
289 /// Train SVM
290 
292 {
295 
296  Log() << kDEBUG << "Create event vector"<< Endl;
297 
298  fDataSize = Data()->GetNEvents();
299  Int_t nSignal = Data()->GetNEvtSigTrain();
300  Int_t nBackground = Data()->GetNEvtBkgdTrain();
301  Double_t CSig;
302  Double_t CBkg;
303 
304  // Use number of signal and background from above to weight the cost parameter
305  // so that the training is not biased towards the larger dataset when the signal
306  // and background samples are significantly different sizes.
307  if(nSignal < nBackground){
308  CSig = fCost;
309  CBkg = CSig*((double)nSignal/nBackground);
310  }
311  else{
312  CBkg = fCost;
313  CSig = CBkg*((double)nSignal/nBackground);
314  }
315 
316  // Loop over events and assign the correct cost parameter.
317  for (Int_t ievnt=0; ievnt<Data()->GetNEvents(); ievnt++){
318  if (GetEvent(ievnt)->GetWeight() != 0){
319  if(DataInfo().IsSignal(GetEvent(ievnt))){
320  fInputData->push_back(new SVEvent(GetEvent(ievnt), CSig, DataInfo().IsSignal\
321  (GetEvent(ievnt))));
322  }
323  else{
324  fInputData->push_back(new SVEvent(GetEvent(ievnt), CBkg, DataInfo().IsSignal\
325  (GetEvent(ievnt))));
326  }
327  }
328  }
329 
330  // Set the correct kernel function.
331  // Here we only use valid Mercer kernels. In the literature some people have reported reasonable
332  // results using Sigmoid kernel function however that is not a valid Mercer kernel and is not used here.
333  if( fTheKernel == "RBF"){
335  }
336  else if( fTheKernel == "MultiGauss" ){
337  if(fGammas!=""){
340  }
341  else{
342  if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
343  else{
344  for(Int_t ngammas=0; ngammas<fNumVars; ++ngammas){
345  fmGamma.push_back(1.0);
346  }
348  }
349  }
351  }
352  else if( fTheKernel == "Polynomial" ){
354  }
355  else if( fTheKernel == "Prod" ){
356  if(fGammas!=""){
359  }
360  else{
361  if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
362  }
364  }
365  else if( fTheKernel == "Sum" ){
366  if(fGammas!=""){
369  }
370  else{
371  if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
372  }
374  }
375  else {
376  Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
377  exit(1);
378  }
379 
380  Log()<< kINFO << "Building SVM Working Set...with "<<fInputData->size()<<" event instances"<< Endl;
381  Timer bldwstime( GetName());
383  Log() << kINFO <<"Elapsed time for Working Set build: "<< bldwstime.GetElapsedTime()<<Endl;
384 
385  // timing
386  Timer timer( GetName() );
387  Log() << kINFO << "Sorry, no computing time forecast available for SVM, please wait ..." << Endl;
388 
390 
392 
393  Log() << kINFO << "Elapsed time: " << timer.GetElapsedTime()
394  << " " << Endl;
395 
396  fBparm = fWgSet->GetBpar();
398  delete fWgSet;
399  fWgSet=0;
400 
403 }
404 
405 ////////////////////////////////////////////////////////////////////////////////
406 /// write configuration to xml file
407 
408 void TMVA::MethodSVM::AddWeightsXMLTo( void* parent ) const
409 {
410  void* wght = gTools().AddChild(parent, "Weights");
411  gTools().AddAttr(wght,"fBparm",fBparm);
412  gTools().AddAttr(wght,"fGamma",fGamma);
413  gTools().AddAttr(wght,"fGammaList",fGammaList);
414  gTools().AddAttr(wght,"fTheta",fTheta);
415  gTools().AddAttr(wght,"fOrder",fOrder);
416  gTools().AddAttr(wght,"NSupVec",fSupportVectors->size());
417 
418  for (std::vector<TMVA::SVEvent*>::iterator veciter=fSupportVectors->begin();
419  veciter!=fSupportVectors->end() ; ++veciter ) {
420  TVectorD temp(GetNvar()+4);
421  temp[0] = (*veciter)->GetNs();
422  temp[1] = (*veciter)->GetTypeFlag();
423  temp[2] = (*veciter)->GetAlpha();
424  temp[3] = (*veciter)->GetAlpha_p();
425  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
426  temp[ivar+4] = (*(*veciter)->GetDataVector())[ivar];
427  gTools().WriteTVectorDToXML(wght,"SupportVector",&temp);
428  }
429  // write max/min data values
430  void* maxnode = gTools().AddChild(wght, "Maxima");
431  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
432  gTools().AddAttr(maxnode, "Var"+gTools().StringFromInt(ivar), GetXmax(ivar));
433  void* minnode = gTools().AddChild(wght, "Minima");
434  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
435  gTools().AddAttr(minnode, "Var"+gTools().StringFromInt(ivar), GetXmin(ivar));
436 }
437 
438 ////////////////////////////////////////////////////////////////////////////////
439 
441 {
442  gTools().ReadAttr( wghtnode, "fBparm",fBparm );
443  gTools().ReadAttr( wghtnode, "fGamma",fGamma);
444  gTools().ReadAttr( wghtnode, "fGammaList",fGammaList);
445  gTools().ReadAttr( wghtnode, "fOrder",fOrder);
446  gTools().ReadAttr( wghtnode, "fTheta",fTheta);
447  UInt_t fNsupv=0;
448  gTools().ReadAttr( wghtnode, "NSupVec",fNsupv );
449 
450  Float_t alpha=0.;
451  Float_t alpha_p = 0.;
452 
453  Int_t typeFlag=-1;
454  // UInt_t ns = 0;
455  std::vector<Float_t>* svector = new std::vector<Float_t>(GetNvar());
456 
457  if (fMaxVars!=0) delete fMaxVars;
458  fMaxVars = new TVectorD( GetNvar() );
459  if (fMinVars!=0) delete fMinVars;
460  fMinVars = new TVectorD( GetNvar() );
461  if (fSupportVectors!=0) {
462  for (vector< SVEvent* >::iterator it = fSupportVectors->begin(); it!=fSupportVectors->end(); ++it)
463  delete *it;
464  delete fSupportVectors;
465  }
466  fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
467  void* supportvectornode = gTools().GetChild(wghtnode);
468  for (UInt_t ievt = 0; ievt < fNsupv; ievt++) {
469  TVectorD temp(GetNvar()+4);
470  gTools().ReadTVectorDFromXML(supportvectornode,"SupportVector",&temp);
471  // ns=(UInt_t)temp[0];
472  typeFlag=(int)temp[1];
473  alpha=temp[2];
474  alpha_p=temp[3];
475  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) (*svector)[ivar]=temp[ivar+4];
476 
477  fSupportVectors->push_back(new SVEvent(svector,alpha,alpha_p,typeFlag));
478  supportvectornode = gTools().GetNextChild(supportvectornode);
479  }
480 
481  void* maxminnode = supportvectornode;
482  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
483  gTools().ReadAttr( maxminnode,"Var"+gTools().StringFromInt(ivar),(*fMaxVars)[ivar]);
484  maxminnode = gTools().GetNextChild(maxminnode);
485  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
486  gTools().ReadAttr( maxminnode,"Var"+gTools().StringFromInt(ivar),(*fMinVars)[ivar]);
487  if (fSVKernelFunction!=0) delete fSVKernelFunction;
488  if( fTheKernel == "RBF" ){
490  }
491  else if( fTheKernel == "MultiGauss" ){
494  }
495  else if( fTheKernel == "Polynomial" ){
497  }
498  else if( fTheKernel == "Prod" ){
501  }
502  else if( fTheKernel == "Sum" ){
505  }
506  else {
507  Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
508  exit(1);
509  }
510  delete svector;
511 }
512 
513 ////////////////////////////////////////////////////////////////////////////////
514 ///TODO write IT
515 /// write training sample (TTree) to file
516 
518 {
519 }
520 
521 ////////////////////////////////////////////////////////////////////////////////
522 
523 void TMVA::MethodSVM::ReadWeightsFromStream( std::istream& istr )
524 {
525  if (fSupportVectors !=0) { delete fSupportVectors; fSupportVectors = 0;}
526  fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
527 
528  // read configuration from input stream
529  istr >> fBparm;
530 
531  UInt_t fNsupv;
532  // coverity[tainted_data_argument]
533  istr >> fNsupv;
534  fSupportVectors->reserve(fNsupv);
535 
536  Float_t typeTalpha=0.;
537  Float_t alpha=0.;
538  Int_t typeFlag=-1;
539  UInt_t ns = 0;
540  std::vector<Float_t>* svector = new std::vector<Float_t>(GetNvar());
541 
542  fMaxVars = new TVectorD( GetNvar() );
543  fMinVars = new TVectorD( GetNvar() );
544 
545  for (UInt_t ievt = 0; ievt < fNsupv; ievt++) {
546  istr>>ns;
547  istr>>typeTalpha;
548  typeFlag = typeTalpha<0?-1:1;
549  alpha = typeTalpha<0?-typeTalpha:typeTalpha;
550  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> svector->at(ivar);
551 
552  fSupportVectors->push_back(new SVEvent(svector,alpha,typeFlag,ns));
553  }
554 
555  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMaxVars)[ivar];
556 
557  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMinVars)[ivar];
558 
559  delete fSVKernelFunction;
560  if (fTheKernel == "Gauss" ) {
562  }
563  else {
565  if(fTheKernel == "Linear") k = SVKernelFunction::kLinear;
566  else if (fTheKernel == "Polynomial") k = SVKernelFunction::kPolynomial;
567  else if (fTheKernel == "Sigmoid" ) k = SVKernelFunction::kSigmoidal;
568  else {
569  Log() << kFATAL <<"Unknown kernel function found in weight file!" << Endl;
570  }
573  }
574  delete svector;
575 }
576 
577 ////////////////////////////////////////////////////////////////////////////////
578 /// TODO write IT
579 
581 {
582 }
583 
584 ////////////////////////////////////////////////////////////////////////////////
585 /// returns MVA value for given event
586 
588 {
589  Double_t myMVA = 0;
590 
591  // TODO: avoid creation of a new SVEvent every time (Joerg)
592  SVEvent* ev = new SVEvent( GetEvent(), 0. ); // check for specificators
593 
594  for (UInt_t ievt = 0; ievt < fSupportVectors->size() ; ievt++) {
595  myMVA += ( fSupportVectors->at(ievt)->GetAlpha()
596  * fSupportVectors->at(ievt)->GetTypeFlag()
597  * fSVKernelFunction->Evaluate( fSupportVectors->at(ievt), ev ) );
598  }
599 
600  delete ev;
601 
602  myMVA -= fBparm;
603 
604  // cannot determine error
605  NoErrorCalc(err, errUpper);
606 
607  // 08/12/09: changed sign here to make results agree with convention signal=1
608  return 1.0/(1.0 + TMath::Exp(myMVA));
609 }
610 ////////////////////////////////////////////////////////////////////////////////
611 
612 const std::vector<Float_t>& TMVA::MethodSVM::GetRegressionValues()
613 {
614  if( fRegressionReturnVal == NULL )
615  fRegressionReturnVal = new std::vector<Float_t>();
616  fRegressionReturnVal->clear();
617 
618  Double_t myMVA = 0;
619 
620  const Event *baseev = GetEvent();
621  SVEvent* ev = new SVEvent( baseev,0. ); //check for specificators
622 
623  for (UInt_t ievt = 0; ievt < fSupportVectors->size() ; ievt++) {
624  myMVA += ( fSupportVectors->at(ievt)->GetDeltaAlpha()
625  *fSVKernelFunction->Evaluate( fSupportVectors->at(ievt), ev ) );
626  }
627  myMVA += fBparm;
628  Event * evT = new Event(*baseev);
629  evT->SetTarget(0,myMVA);
630 
631  const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
632 
633  fRegressionReturnVal->push_back(evT2->GetTarget(0));
634 
635  delete evT;
636 
637  delete ev;
638 
639  return *fRegressionReturnVal;
640 }
641 
642 ////////////////////////////////////////////////////////////////////////////////
643 /// write specific classifier response
644 
645 void TMVA::MethodSVM::MakeClassSpecific( std::ostream& fout, const TString& className ) const
646 {
647  const int fNsupv = fSupportVectors->size();
648  fout << " // not implemented for class: \"" << className << "\"" << std::endl;
649  fout << " float fBparameter;" << std::endl;
650  fout << " int fNOfSuppVec;" << std::endl;
651  fout << " static float fAllSuppVectors[][" << fNsupv << "];" << std::endl;
652  fout << " static float fAlphaTypeCoef[" << fNsupv << "];" << std::endl;
653  fout << std::endl;
654  fout << " // Kernel parameter(s) " << std::endl;
655  fout << " float fGamma;" << std::endl;
656  fout << "};" << std::endl;
657  fout << "" << std::endl;
658 
659  //Initialize function definition
660  fout << "inline void " << className << "::Initialize() " << std::endl;
661  fout << "{" << std::endl;
662  fout << " fBparameter = " << fBparm << ";" << std::endl;
663  fout << " fNOfSuppVec = " << fNsupv << ";" << std::endl;
664  fout << " fGamma = " << fGamma << ";" <<std::endl;
665  fout << "}" << std::endl;
666  fout << std::endl;
667 
668  // GetMvaValue__ function defninition
669  fout << "inline double " << className << "::GetMvaValue__(const std::vector<double>& inputValues ) const" << std::endl;
670  fout << "{" << std::endl;
671  fout << " double mvaval = 0; " << std::endl;
672  fout << " double temp = 0; " << std::endl;
673  fout << std::endl;
674  fout << " for (int ievt = 0; ievt < fNOfSuppVec; ievt++ ){" << std::endl;
675  fout << " temp = 0;" << std::endl;
676  fout << " for ( unsigned int ivar = 0; ivar < GetNvar(); ivar++ ) {" << std::endl;
677 
678  fout << " temp += (fAllSuppVectors[ivar][ievt] - inputValues[ivar]) " << std::endl;
679  fout << " * (fAllSuppVectors[ivar][ievt] - inputValues[ivar]); " << std::endl;
680  fout << " }" << std::endl;
681  fout << " mvaval += fAlphaTypeCoef[ievt] * exp( -fGamma * temp ); " << std::endl;
682 
683  fout << " }" << std::endl;
684  fout << " mvaval -= fBparameter;" << std::endl;
685  fout << " return 1./(1. + exp(mvaval));" << std::endl;
686  fout << "}" << std::endl;
687  fout << "// Clean up" << std::endl;
688  fout << "inline void " << className << "::Clear() " << std::endl;
689  fout << "{" << std::endl;
690  fout << " // nothing to clear " << std::endl;
691  fout << "}" << std::endl;
692  fout << "" << std::endl;
693 
694  // define support vectors
695  fout << "float " << className << "::fAlphaTypeCoef[] =" << std::endl;
696  fout << "{ ";
697  for (Int_t isv = 0; isv < fNsupv; isv++) {
698  fout << fSupportVectors->at(isv)->GetDeltaAlpha() * fSupportVectors->at(isv)->GetTypeFlag();
699  if (isv < fNsupv-1) fout << ", ";
700  }
701  fout << " };" << std::endl << std::endl;
702 
703  fout << "float " << className << "::fAllSuppVectors[][" << fNsupv << "] =" << std::endl;
704  fout << "{";
705  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
706  fout << std::endl;
707  fout << " { ";
708  for (Int_t isv = 0; isv < fNsupv; isv++){
709  fout << fSupportVectors->at(isv)->GetDataVector()->at(ivar);
710  if (isv < fNsupv-1) fout << ", ";
711  }
712  fout << " }";
713  if (ivar < GetNvar()-1) fout << ", " << std::endl;
714  else fout << std::endl;
715  }
716  fout << "};" << std::endl<< std::endl;
717 }
718 
719 ////////////////////////////////////////////////////////////////////////////////
720 /// get help message text
721 ///
722 /// typical length of text line:
723 /// "|--------------------------------------------------------------|"
724 
726 {
727  Log() << Endl;
728  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
729  Log() << Endl;
730  Log() << "The Support Vector Machine (SVM) builds a hyperplance separating" << Endl;
731  Log() << "signal and background events (vectors) using the minimal subset of " << Endl;
732  Log() << "all vectors used for training (support vectors). The extension to" << Endl;
733  Log() << "the non-linear case is performed by mapping input vectors into a " << Endl;
734  Log() << "higher-dimensional feature space in which linear separation is " << Endl;
735  Log() << "possible. The use of the kernel functions thereby eliminates the " << Endl;
736  Log() << "explicit transformation to the feature space. The implemented SVM " << Endl;
737  Log() << "algorithm performs the classification tasks using linear, polynomial, " << Endl;
738  Log() << "Gaussian and sigmoidal kernel functions. The Gaussian kernel allows " << Endl;
739  Log() << "to apply any discriminant shape in the input space." << Endl;
740  Log() << Endl;
741  Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
742  Log() << Endl;
743  Log() << "SVM is a general purpose non-linear classification method, which " << Endl;
744  Log() << "does not require data preprocessing like decorrelation or Principal " << Endl;
745  Log() << "Component Analysis. It generalises quite well and can handle analyses " << Endl;
746  Log() << "with large numbers of input variables." << Endl;
747  Log() << Endl;
748  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
749  Log() << Endl;
750  Log() << "Optimal performance requires primarily a proper choice of the kernel " << Endl;
751  Log() << "parameters (the width \"Sigma\" in case of Gaussian kernel) and the" << Endl;
752  Log() << "cost parameter \"C\". The user must optimise them empirically by running" << Endl;
753  Log() << "SVM several times with different parameter sets. The time needed for " << Endl;
754  Log() << "each evaluation scales like the square of the number of training " << Endl;
755  Log() << "events so that a coarse preliminary tuning should be performed on " << Endl;
756  Log() << "reduced data sets." << Endl;
757 }
758 
759 ////////////////////////////////////////////////////////////////////////////////
760 /// Optimize Tuning Parameters
761 /// This is used to optimise the kernel function parameters and cost. All kernel parameters
762 /// are optimised by default with default ranges, however the parameters to be optimised can
763 /// be set when booking the method with the option Tune.
764 /// Example: "Tune=Gamma[0.01;1.0;100]" would only tune the RBF Gamma between 0.01 and 1.0
765 /// with 100 steps.
766 std::map<TString,Double_t> TMVA::MethodSVM::OptimizeTuningParameters(TString fomType, TString fitType)
767 {
768  // Call the Optimizer with the set of kernel parameters and ranges that are meant to be tuned.
769  std::map< TString,std::vector<Double_t> > optVars;
770  // Get parameters and options specified in booking of method.
771  if(fTune != "All"){
772  optVars= GetTuningOptions();
773  }
774  std::map< TString,std::vector<Double_t> >::iterator iter;
775  // Fill all the tuning parameters that should be optimized into a map
776  std::map<TString,TMVA::Interval*> tuneParameters;
777  std::map<TString,Double_t> tunedParameters;
778  // Note: the 3rd parameter in the interval is the "number of bins", NOT the stepsize!!
779  // The actual values are always read from the middle of the bins.
780  Log() << kINFO << "Using the " << fTheKernel << " kernel." << Endl;
781  // Setup map of parameters based on the specified options or defaults.
782  if( fTheKernel == "RBF" ){
783  if(fTune == "All"){
784  tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
785  tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
786  }
787  else{
788  for(iter=optVars.begin(); iter!=optVars.end(); iter++){
789  if( iter->first == "Gamma" || iter->first == "C"){
790  tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
791  }
792  else{
793  Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
794  exit(1);
795  }
796  }
797  }
798  }
799  else if( fTheKernel == "Polynomial" ){
800  if (fTune == "All"){
801  tuneParameters.insert(std::pair<TString,Interval*>("Order", new Interval(1,10,10)));
802  tuneParameters.insert(std::pair<TString,Interval*>("Theta", new Interval(0.01,1.,100)));
803  tuneParameters.insert(std::pair<TString,Interval*>("C", new Interval(0.01,1.,100)));
804  }
805  else{
806  for(iter=optVars.begin(); iter!=optVars.end(); iter++){
807  if( iter->first == "Theta" || iter->first == "C"){
808  tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
809  }
810  else if( iter->first == "Order"){
811  tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
812  }
813  else{
814  Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
815  exit(1);
816  }
817  }
818  }
819  }
820  else if( fTheKernel == "MultiGauss" ){
821  if (fTune == "All"){
822  for(int i=0; i<fNumVars; i++){
823  stringstream s;
824  s << fVarNames.at(i);
825  string str = "Gamma_" + s.str();
826  tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
827  }
828  tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
829  } else {
830  for(iter=optVars.begin(); iter!=optVars.end(); iter++){
831  if( iter->first == "GammaList"){
832  for(int j=0; j<fNumVars; j++){
833  stringstream s;
834  s << fVarNames.at(j);
835  string str = "Gamma_" + s.str();
836  tuneParameters.insert(std::pair<TString,Interval*>(str, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
837  }
838  }
839  else if( iter->first == "C"){
840  tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
841  }
842  else{
843  Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
844  exit(1);
845  }
846  }
847  }
848  }
849  else if( fTheKernel == "Prod" ){
850  std::stringstream tempstring(fMultiKernels);
851  std::string value;
852  while (std::getline(tempstring,value,'*')){
853  if(value == "RBF"){
854  tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
855  }
856  else if(value == "MultiGauss"){
857  for(int i=0; i<fNumVars; i++){
858  stringstream s;
859  s << fVarNames.at(i);
860  string str = "Gamma_" + s.str();
861  tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
862  }
863  }
864  else if(value == "Polynomial"){
865  tuneParameters.insert(std::pair<TString,Interval*>("Order",new Interval(1,10,10)));
866  tuneParameters.insert(std::pair<TString,Interval*>("Theta",new Interval(0.0,1.0,101)));
867  }
868  else {
869  Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
870  exit(1);
871  }
872  }
873  tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
874  }
875  else if( fTheKernel == "Sum" ){
876  std::stringstream tempstring(fMultiKernels);
877  std::string value;
878  while (std::getline(tempstring,value,'+')){
879  if(value == "RBF"){
880  tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
881  }
882  else if(value == "MultiGauss"){
883  for(int i=0; i<fNumVars; i++){
884  stringstream s;
885  s << fVarNames.at(i);
886  string str = "Gamma_" + s.str();
887  tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
888  }
889  }
890  else if(value == "Polynomial"){
891  tuneParameters.insert(std::pair<TString,Interval*>("Order",new Interval(1,10,10)));
892  tuneParameters.insert(std::pair<TString,Interval*>("Theta",new Interval(0.0,1.0,101)));
893  }
894  else {
895  Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
896  exit(1);
897  }
898  }
899  tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
900  }
901  else {
902  Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
903  exit(1);
904  }
905  Log() << kINFO << " the following SVM parameters will be tuned on the respective *grid*\n" << Endl;
906  std::map<TString,TMVA::Interval*>::iterator it;
907  for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
908  Log() << kWARNING << it->first <<Endl;
909  std::ostringstream oss;
910  (it->second)->Print(oss);
911  Log()<<oss.str();
912  Log()<<Endl;
913  }
914  OptimizeConfigParameters optimize(this, tuneParameters, fomType, fitType);
915  tunedParameters=optimize.optimize();
916 
917  return tunedParameters;
918 
919 }
920 
921 ////////////////////////////////////////////////////////////////////////////////
922 /// Set the tuning parameters according to the arguement
923 void TMVA::MethodSVM::SetTuneParameters(std::map<TString,Double_t> tuneParameters)
924 {
925  std::map<TString,Double_t>::iterator it;
926  if( fTheKernel == "RBF" ){
927  for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
928  Log() << kWARNING << it->first << " = " << it->second << Endl;
929  if (it->first == "Gamma"){
930  SetGamma (it->second);
931  }
932  else if(it->first == "C"){
933  SetCost (it->second);
934  }
935  else {
936  Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
937  }
938  }
939  }
940  else if( fTheKernel == "MultiGauss" ){
941  fmGamma.clear();
942  for(int i=0; i<fNumVars; i++){
943  stringstream s;
944  s << fVarNames.at(i);
945  string str = "Gamma_" + s.str();
946  Log() << kWARNING << tuneParameters.find(str)->first << " = " << tuneParameters.find(str)->second << Endl;
947  fmGamma.push_back(tuneParameters.find(str)->second);
948  }
949  for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
950  if (it->first == "C"){
951  Log() << kWARNING << it->first << " = " << it->second << Endl;
952  SetCost(it->second);
953  break;
954  }
955  }
956  }
957  else if( fTheKernel == "Polynomial" ){
958  for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
959  Log() << kWARNING << it->first << " = " << it->second << Endl;
960  if (it->first == "Order"){
961  SetOrder(it->second);
962  }
963  else if (it->first == "Theta"){
964  SetTheta(it->second);
965  }
966  else if(it->first == "C"){ SetCost (it->second);
967  }
968  else if(it->first == "Mult"){
969  SetMult(it->second);
970  }
971  else{
972  Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
973  }
974  }
975  }
976  else if( fTheKernel == "Prod" || fTheKernel == "Sum"){
977  fmGamma.clear();
978  for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
979  bool foundParam = false;
980  Log() << kWARNING << it->first << " = " << it->second << Endl;
981  for(int i=0; i<fNumVars; i++){
982  stringstream s;
983  s << fVarNames.at(i);
984  string str = "Gamma_" + s.str();
985  if(it->first == str){
986  fmGamma.push_back(it->second);
987  foundParam = true;
988  }
989  }
990  if (it->first == "Gamma"){
991  SetGamma (it->second);
992  foundParam = true;
993  }
994  else if (it->first == "Order"){
995  SetOrder (it->second);
996  foundParam = true;
997  }
998  else if (it->first == "Theta"){
999  SetTheta (it->second);
1000  foundParam = true;
1001  }
1002  else if (it->first == "C"){ SetCost (it->second);
1003  SetCost (it->second);
1004  foundParam = true;
1005  }
1006  else{
1007  if(!foundParam){
1008  Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
1009  }
1010  }
1011  }
1012  }
1013  else {
1014  Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
1015  exit(1);
1016  }
1017 }
1018 
1019 ////////////////////////////////////////////////////////////////////////////////
1020 /// Takes as input a string of values for multigaussian gammas and splits it, filling the
1021 /// gamma vector required by the SVKernelFunction. Example: "GammaList=0.1,0.2,0.3" would
1022 /// make a vector with Gammas of 0.1,0.2 & 0.3 corresponding to input variables 1,2 & 3
1023 /// respectively.
1024 void TMVA::MethodSVM::SetMGamma(std::string & mg){
1025  std::stringstream tempstring(mg);
1026  Float_t value;
1027  while (tempstring >> value){
1028  fmGamma.push_back(value);
1029 
1030  if (tempstring.peek() == ','){
1031  tempstring.ignore();
1032  }
1033  }
1034 }
1035 
1036 ////////////////////////////////////////////////////////////////////////////////
1037 /// Produces GammaList string for multigaussian kernel to be written to xml file
1038 void TMVA::MethodSVM::GetMGamma(const std::vector<float> & gammas){
1039  std::ostringstream tempstring;
1040  for(UInt_t i = 0; i<gammas.size(); ++i){
1041  tempstring << gammas.at(i);
1042  if(i!=(gammas.size()-1)){
1043  tempstring << ",";
1044  }
1045  }
1046  fGammaList= tempstring.str();
1047 }
1048 
1049 ////////////////////////////////////////////////////////////////////////////////
1050 /// MakeKernelList
1051 /// Function providing string manipulation for product or sum of kernels functions
1052 /// to take list of kernels specified in the booking of the method and provide a vector
1053 /// of SV kernels to iterate over in SVKernelFunction.
1054 /// Example: "KernelList=RBF*Polynomial" would use a product of the RBF and Polynomial
1055 /// kernels.
1056 std::vector<TMVA::SVKernelFunction::EKernelType> TMVA::MethodSVM::MakeKernelList(std::string multiKernels, TString kernel)
1057 {
1058  std::vector<TMVA::SVKernelFunction::EKernelType> kernelsList;
1059  std::stringstream tempstring(multiKernels);
1060  std::string value;
1061  if(kernel=="Prod"){
1062  while (std::getline(tempstring,value,'*')){
1063  if(value == "RBF"){ kernelsList.push_back(SVKernelFunction::kRBF);}
1064  else if(value == "MultiGauss"){
1065  kernelsList.push_back(SVKernelFunction::kMultiGauss);
1066  if(fGammas!=""){
1067  SetMGamma(fGammas);
1068  }
1069  }
1070  else if(value == "Polynomial"){ kernelsList.push_back(SVKernelFunction::kPolynomial);}
1071  else {
1072  Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
1073  exit(1);
1074  }
1075  }
1076  }
1077  else if(kernel=="Sum"){
1078  while (std::getline(tempstring,value,'+')){
1079  if(value == "RBF"){ kernelsList.push_back(SVKernelFunction::kRBF);}
1080  else if(value == "MultiGauss"){
1081  kernelsList.push_back(SVKernelFunction::kMultiGauss);
1082  if(fGammas!=""){
1083  SetMGamma(fGammas);
1084  }
1085  }
1086  else if(value == "Polynomial"){ kernelsList.push_back(SVKernelFunction::kPolynomial);}
1087  else {
1088  Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
1089  exit(1);
1090  }
1091  }
1092  }
1093  else {
1094  Log() << kWARNING << "Unable to split MultiKernels. Delimiters */+ required." << Endl;
1095  exit(1);
1096  }
1097  return kernelsList;
1098 }
1099 
1100 ////////////////////////////////////////////////////////////////////////////////
1101 /// GetTuningOptions
1102 /// Function to allow for ranges and number of steps (for scan) when optimising kernel
1103 /// function parameters. Specified when booking the method after the parameter to be
1104 /// optimised between square brackets with each value separated by ;, the first value
1105 /// is the lower limit, the second the upper limit and the third is the number of steps.
1106 /// Example: "Tune=Gamma[0.01;1.0;100]" would only tune the RBF Gamma between 0.01 and
1107 /// 100 steps.
1108 std::map< TString,std::vector<Double_t> > TMVA::MethodSVM::GetTuningOptions()
1109 {
1110  std::map< TString,std::vector<Double_t> > optVars;
1111  std::stringstream tempstring(fTune);
1112  std::string value;
1113  while (std::getline(tempstring,value,',')){
1114  unsigned first = value.find('[')+1;
1115  unsigned last = value.find_last_of(']');
1116  std::string optParam = value.substr(0,first-1);
1117  std::stringstream strNew (value.substr(first,last-first));
1118  Double_t optInterval;
1119  std::vector<Double_t> tempVec;
1120  UInt_t i = 0;
1121  while (strNew >> optInterval){
1122  tempVec.push_back(optInterval);
1123  if (strNew.peek() == ';'){
1124  strNew.ignore();
1125  }
1126  ++i;
1127  }
1128  if(i != 3 && i == tempVec.size()){
1129  if(optParam == "C" || optParam == "Gamma" || optParam == "GammaList" || optParam == "Theta"){
1130  switch(i){
1131  case 0:
1132  tempVec.push_back(0.01);
1133  case 1:
1134  tempVec.push_back(1.);
1135  case 2:
1136  tempVec.push_back(100);
1137  }
1138  }
1139  else if(optParam == "Order"){
1140  switch(i){
1141  case 0:
1142  tempVec.push_back(1);
1143  case 1:
1144  tempVec.push_back(10);
1145  case 2:
1146  tempVec.push_back(10);
1147  }
1148  }
1149  else{
1150  Log() << kWARNING << optParam << " is not a recognised tuneable parameter." << Endl;
1151  exit(1);
1152  }
1153  }
1154  optVars.insert(std::pair<TString,std::vector<Double_t> >(optParam,tempVec));
1155  }
1156  return optVars;
1157 }
1158 
1159 ////////////////////////////////////////////////////////////////////////////////
1160 /// getLoss
1161 /// Calculates loss for testing dataset. The loss function can be specified when
1162 /// booking the method, otherwise defaults to hinge loss. Currently not used however
1163 /// is accesible if required.
1165  Double_t loss = 0.0;
1166  Double_t sumW = 0.0;
1167  Double_t temp = 0.0;
1170  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1171  const Event* ev = GetEvent(ievt);
1172  Float_t v = (*mvaRes)[ievt][0];
1173  Float_t w = ev->GetWeight();
1174  if(DataInfo().IsSignal(ev)){
1175  if(lossFunction == "hinge"){
1176  temp += w*(1-v);
1177  }
1178  else if(lossFunction == "exp"){
1179  temp += w*TMath::Exp(-v);
1180  }
1181  else if(lossFunction == "binomial"){
1182  temp += w*TMath::Log(1+TMath::Exp(-2*v));
1183  }
1184  else{
1185  Log() << kWARNING << lossFunction << " is not a recognised loss function." << Endl;
1186  exit(1);
1187  }
1188  }
1189  else{
1190  if(lossFunction == "hinge"){
1191  temp += w*v;
1192  }
1193  else if(lossFunction == "exp"){
1194  temp += w*TMath::Exp(-(1-v));
1195  }
1196  else if(lossFunction == "binomial"){
1197  temp += w*TMath::Log(1+TMath::Exp(-2*(1-v)));
1198  }
1199  else{
1200  Log() << kWARNING << lossFunction << " is not a recognised loss function." << Endl;
1201  exit(1);
1202  }
1203  }
1204  sumW += w;
1205  }
1206  loss = temp/sumW;
1207 
1208  return loss;
1209 }
void Train(void)
Train SVM.
Definition: MethodSVM.cxx:291
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodSVM.cxx:645
std::vector< TMVA::SVKernelFunction::EKernelType > MakeKernelList(std::string multiKernels, TString kernel)
MakeKernelList Function providing string manipulation for product or sum of kernels functions to take...
Definition: MethodSVM.cxx:1056
Float_t fTolerance
Definition: MethodSVM.h:140
TString fTheKernel
Definition: MethodSVM.h:154
void SetMult(Double_t m)
Definition: MethodSVM.h:119
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
Definition: MethodSVM.cxx:261
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
Float_t fNumVars
Definition: MethodSVM.h:161
long long Long64_t
Definition: RtypesCore.h:69
Float_t fDoubleSigmaSquared
Definition: MethodSVM.h:155
Double_t Log(Double_t x)
Definition: TMath.h:526
float Float_t
Definition: RtypesCore.h:53
std::map< TString, std::vector< Double_t > > GetTuningOptions()
GetTuningOptions Function to allow for ranges and number of steps (for scan) when optimising kernel f...
Definition: MethodSVM.cxx:1108
UInt_t GetNvar() const
Definition: MethodBase.h:340
UShort_t fNSubSets
Definition: MethodSVM.h:142
MsgLogger & Log() const
Definition: Configurable.h:128
std::string fTune
Definition: MethodSVM.h:165
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:50
EAnalysisType
Definition: Types.h:129
std::string fMultiKernels
Definition: MethodSVM.h:166
void Train(UInt_t nIter=1000)
train the SVM
bool fExitFromTraining
Definition: MethodBase.h:443
std::vector< TMVA::SVEvent * > * fInputData
Definition: MethodSVM.h:146
Basic string class.
Definition: TString.h:137
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:390
Float_t fTheta
Definition: MethodSVM.h:157
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
std::string fGammas
Definition: MethodSVM.h:163
void AddWeightsXMLTo(void *parent) const
write configuration to xml file
Definition: MethodSVM.cxx:408
void ProcessOptions()
option post processing (if necessary)
Definition: MethodSVM.cxx:278
TString GetElapsedTime(Bool_t Scientific=kTRUE)
Definition: Timer.cxx:129
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:309
void setCompatibilityParams(EKernelType k, UInt_t order, Float_t theta, Float_t kappa)
set old options for compatibility mode
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
std::vector< TString > fVarNames
Definition: MethodSVM.h:162
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:443
TVectorD * fMinVars
Definition: MethodSVM.h:150
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
Set the tuning parameters according to the arguement.
Definition: MethodSVM.cxx:923
Tools & gTools()
Definition: Tools.cxx:79
Float_t fCost
Definition: MethodSVM.h:139
Float_t fKappa
Definition: MethodSVM.h:158
TStopwatch timer
Definition: pirndm.C:37
std::vector< Float_t > fmGamma
Definition: MethodSVM.h:160
void SetOrder(Double_t o)
Definition: MethodSVM.h:116
const Event * GetEvent() const
Definition: MethodBase.h:745
DataSet * Data() const
Definition: MethodBase.h:405
void GetMGamma(const std::vector< float > &gammas)
Produces GammaList string for multigaussian kernel to be written to xml file.
Definition: MethodSVM.cxx:1038
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
TVectorD * fMaxVars
Definition: MethodSVM.h:151
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:352
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
Bool_t DoRegression() const
Definition: MethodBase.h:434
Float_t fBparm
Definition: MethodSVM.h:143
void DeclareOptions()
declare options available for this method
Definition: MethodSVM.cxx:230
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
TVectorT< Double_t > TVectorD
Definition: TVectorDfwd.h:24
UInt_t fIPyCurrentIter
Definition: MethodBase.h:444
virtual void Print(Option_t *option="") const
Print TNamed name and title.
Definition: TNamed.cxx:119
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:413
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:353
void SetGamma(Double_t g)
Definition: MethodSVM.h:113
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:104
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
Definition: DataSet.cxx:286
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodSVM.cxx:440
void ReadWeightsFromStream(std::istream &istr)
Definition: MethodSVM.cxx:523
MethodSVM(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
Definition: MethodSVM.cxx:100
SVector< double, 2 > v
Definition: Dict.h:5
const char * GetName() const
Definition: MethodBase.h:330
std::map< TString, Double_t > optimize()
void Init(void)
default initialisation
Definition: MethodSVM.cxx:215
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particulary Method instance (here appareantly called resultsName i...
Definition: DataSet.cxx:337
UInt_t fIPyMaxIter
Definition: MethodBase.h:444
SVKernelFunction * fSVKernelFunction
Definition: MethodSVM.h:148
void SetTheta(Double_t t)
Definition: MethodSVM.h:117
unsigned int UInt_t
Definition: RtypesCore.h:42
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
TString fLoss
Definition: MethodSVM.h:169
const TString & GetMethodName() const
Definition: MethodBase.h:327
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:356
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:296
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="Minuit")
Optimize Tuning Parameters This is used to optimise the kernel function parameters and cost...
Definition: MethodSVM.cxx:766
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
SVM can handle classification with 2 classes and regression with one regression-target.
Definition: MethodSVM.cxx:205
void SetNormalised(Bool_t norm)
Definition: MethodBase.h:491
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
Definition: MethodSVM.cxx:587
std::vector< TMVA::SVEvent * > * fSupportVectors
Definition: MethodSVM.h:147
void WriteWeightsToStream(TFile &fout) const
TODO write IT write training sample (TTree) to file.
Definition: MethodSVM.cxx:517
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:680
Double_t Exp(Double_t x)
Definition: TMath.h:495
#define ClassImp(name)
Definition: Rtypes.h:279
double Double_t
Definition: RtypesCore.h:55
Double_t getLoss(TString lossFunction)
getLoss Calculates loss for testing dataset.
Definition: MethodSVM.cxx:1164
std::vector< TMVA::SVEvent * > * GetSupportVectors()
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:435
int type
Definition: TGX11.cxx:120
void SetCost(Double_t c)
Definition: MethodSVM.h:114
void WriteTVectorDToXML(void *node, const char *name, TVectorD *vec)
Definition: Tools.cxx:1267
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:114
void ExitFromTraining()
Definition: MethodBase.h:458
void GetHelpMessage() const
get help message text
Definition: MethodSVM.cxx:725
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
IPythonInteractive * fInteractive
Definition: MethodBase.h:442
virtual ~MethodSVM(void)
destructor
Definition: MethodSVM.cxx:171
TString GetMethodTypeName() const
Definition: MethodBase.h:328
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:590
std::string fGammaList
Definition: MethodSVM.h:164
#define NULL
Definition: Rtypes.h:82
Float_t fGamma
Definition: MethodSVM.h:144
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:229
Bool_t IsSignal(const Event *ev) const
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:591
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:433
Definition: first.py:1
void ReadTVectorDFromXML(void *node, const char *name, TVectorD *vec)
Definition: Tools.cxx:1275
const Bool_t kTRUE
Definition: Rtypes.h:91
UInt_t fMaxIter
Definition: MethodSVM.h:141
void Reset(void)
Definition: MethodSVM.cxx:184
void SetIPythonInteractive(bool *ExitFromTraining, UInt_t *fIPyCurrentIter_)
Definition: SVWorkingSet.h:67
void SetMGamma(std::string &mg)
Takes as input a string of values for multigaussian gammas and splits it, filling the gamma vector re...
Definition: MethodSVM.cxx:1024
std::vector< VariableInfo > & GetVariableInfos()
Definition: DataSetInfo.h:112
SVWorkingSet * fWgSet
Definition: MethodSVM.h:145
Float_t Evaluate(SVEvent *ev1, SVEvent *ev2)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:819
const std::vector< Float_t > & GetRegressionValues()
Definition: MethodSVM.cxx:612