Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
MethodSVM.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Marcin Wolter, Andrzej Zemla
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodSVM *
8 * *
9 * *
10 * Description: *
11 * Implementation *
12 * *
13 * Authors (alphabetical): *
14 * Marcin Wolter <Marcin.Wolter@cern.ch> - IFJ PAN, Krakow, Poland *
15 * Andrzej Zemla <azemla@cern.ch> - IFJ PAN, Krakow, Poland *
16 * (IFJ PAN: Henryk Niewodniczanski Inst. Nucl. Physics, Krakow, Poland) *
17 * *
18 * Introduction of regression by: *
19 * Krzysztof Danielowski <danielow@cern.ch> - IFJ PAN & AGH, Krakow, Poland *
20 * Kamil Kraszewski <kalq@cern.ch> - IFJ PAN & UJ, Krakow, Poland *
21 * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Krakow, Poland *
22 * *
23 * Introduction of kernel parameter optimisation *
24 * and additional kernel functions by: *
25 * Adrian Bevan <adrian.bevan@cern.ch> - Queen Mary *
26 * University of London, UK *
27 * Tom Stevenson <thomas.james.stevenson@cern.ch> - Queen Mary *
28 * University of London, UK *
29 * *
30 * Copyright (c) 2005: *
31 * CERN, Switzerland *
32 * MPI-K Heidelberg, Germany *
33 * PAN, Krakow, Poland *
34 * *
35 * Redistribution and use in source and binary forms, with or without *
36 * modification, are permitted according to the terms listed in LICENSE *
37 * (see tmva/doc/LICENSE) *
38 **********************************************************************************/
39
40/*! \class TMVA::MethodSVM
41\ingroup TMVA
42SMO Platt's SVM classifier with Keerthi & Shavade improvements
43*/
44
45#include "TMVA/MethodSVM.h"
46
47#include "TMVA/Tools.h"
48#include "TMVA/Timer.h"
49
50#include "TMVA/SVWorkingSet.h"
51
52#include "TMVA/SVEvent.h"
53
55
57#include "TMVA/Configurable.h"
58#include "TMVA/DataSet.h"
59#include "TMVA/DataSetInfo.h"
60#include "TMVA/Event.h"
61#include "TMVA/IMethod.h"
62#include "TMVA/MethodBase.h"
63#include "TMVA/MsgLogger.h"
64#include "TMVA/Types.h"
65#include "TMVA/Interval.h"
67#include "TMVA/Results.h"
69#include "TMVA/VariableInfo.h"
70
71#include "TFile.h"
72#include "TVectorD.h"
73#include "TMath.h"
74
75#include <iostream>
76#include <string>
77
78using std::vector;
79using std::string;
80using std::stringstream;
81
82//const Int_t basketsize__ = 1280000;
84
85
86////////////////////////////////////////////////////////////////////////////////
87/// standard constructor
88
89 TMVA::MethodSVM::MethodSVM( const TString& jobName, const TString& methodTitle, DataSetInfo& theData,
90 const TString& theOption )
91 : MethodBase( jobName, Types::kSVM, methodTitle, theData, theOption)
92 , fCost(0)
93 , fTolerance(0)
94 , fMaxIter(0)
95 , fNSubSets(0)
96 , fBparm(0)
97 , fGamma(0)
98 , fWgSet(0)
99 , fInputData(0)
100 , fSupportVectors(0)
102 , fMinVars(0)
103 , fMaxVars(0)
105 , fOrder(0)
106 , fTheta(0)
107 , fKappa(0)
108 , fMult(0)
109 ,fNumVars(0)
110 , fGammas("")
111 , fGammaList("")
112 , fDataSize(0)
113 , fLoss(0)
114{
115 fVarNames.clear();
116 fNumVars = theData.GetVariableInfos().size();
117 for( int i=0; i<fNumVars; i++){
118 fVarNames.push_back(theData.GetVariableInfos().at(i).GetTitle());
119 }
120}
121
122////////////////////////////////////////////////////////////////////////////////
123/// constructor from weight file
124
125TMVA::MethodSVM::MethodSVM( DataSetInfo& theData, const TString& theWeightFile)
126 : MethodBase( Types::kSVM, theData, theWeightFile)
127 , fCost(0)
128 , fTolerance(0)
129 , fMaxIter(0)
130 , fNSubSets(0)
131 , fBparm(0)
132 , fGamma(0)
133 , fWgSet(0)
134 , fInputData(0)
135 , fSupportVectors(0)
137 , fMinVars(0)
138 , fMaxVars(0)
140 , fOrder(0)
141 , fTheta(0)
142 , fKappa(0)
143 , fMult(0)
144 , fNumVars(0)
145 , fGammas("")
146 , fGammaList("")
147 , fDataSize(0)
148 , fLoss(0)
149{
150 fVarNames.clear();
151 fNumVars = theData.GetVariableInfos().size();
152 for( int i=0;i<fNumVars; i++){
153 fVarNames.push_back(theData.GetVariableInfos().at(i).GetTitle());
154 }
155}
156
157////////////////////////////////////////////////////////////////////////////////
158/// destructor
159
161{
162 fSupportVectors->clear();
163 for (UInt_t i=0; i<fInputData->size(); i++) {
164 delete fInputData->at(i);
165 }
166 if (fWgSet !=0) { delete fWgSet; fWgSet=0; }
167 if (fSVKernelFunction !=0 ) { delete fSVKernelFunction; fSVKernelFunction = 0; }
168}
169
170////////////////////////////////////////////////////////////////////////////////
171// reset the method, as if it had just been instantiated (forget all training etc.)
172
174{
175 // reset the method, as if it had just been instantiated (forget all training etc.)
176 fSupportVectors->clear();
177 for (UInt_t i=0; i<fInputData->size(); i++){
178 delete fInputData->at(i);
179 fInputData->at(i)=0;
180 }
181 fInputData->clear();
182 if (fWgSet !=0) { fWgSet=0; }
183 if (fSVKernelFunction !=0 ) { fSVKernelFunction = 0; }
184 if (Data()){
185 Data()->DeleteResults(GetMethodName(), Types::kTraining, GetAnalysisType());
186 }
187
188 Log() << kDEBUG << " successfully(?) reset the method " << Endl;
189}
190
191////////////////////////////////////////////////////////////////////////////////
192/// SVM can handle classification with 2 classes and regression with one regression-target
193
195{
196 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
197 if (type == Types::kRegression && numberTargets == 1) return kTRUE;
198 return kFALSE;
199}
200
201////////////////////////////////////////////////////////////////////////////////
202/// default initialisation
203
205{
206 // SVM always uses normalised input variables
208
209 // Helge: do not book a event vector of given size but rather fill the vector
210 // later with pus_back. Anyway, this is NOT what is time consuming in
211 // SVM and it allows to skip totally events with weights == 0 ;)
212 fInputData = new std::vector<TMVA::SVEvent*>(0);
213 fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
214}
215
216////////////////////////////////////////////////////////////////////////////////
217/// declare options available for this method
218
220{
221 DeclareOptionRef( fTheKernel = "RBF", "Kernel", "Pick which kernel ( RBF or MultiGauss )");
222 // for gaussian kernel parameter(s)
223 DeclareOptionRef( fGamma = 1., "Gamma", "RBF kernel parameter: Gamma (size of the Kernel)");
224 // for polynomial kernel parameter(s)
225 DeclareOptionRef( fOrder = 3, "Order", "Polynomial Kernel parameter: polynomial order");
226 DeclareOptionRef( fTheta = 1., "Theta", "Polynomial Kernel parameter: polynomial theta");
227 // for multi-gaussian kernel parameter(s)
228 DeclareOptionRef( fGammas = "", "GammaList", "MultiGauss parameters" );
229
230 // for range and step number for kernel parameter optimisation
231 DeclareOptionRef( fTune = "All", "Tune", "Tune Parameters");
232 // for list of kernels to be used with product or sum kernel
233 DeclareOptionRef( fMultiKernels = "None", "KernelList", "Sum or product of kernels");
234 DeclareOptionRef( fLoss = "hinge", "Loss", "Loss function");
235
236 DeclareOptionRef( fCost, "C", "Cost parameter" );
237 if (DoRegression()) {
238 fCost = 0.002;
239 }else{
240 fCost = 1.;
241 }
242 DeclareOptionRef( fTolerance = 0.01, "Tol", "Tolerance parameter" ); //should be fixed
243 DeclareOptionRef( fMaxIter = 1000, "MaxIter", "Maximum number of training loops" );
244
245}
246
247////////////////////////////////////////////////////////////////////////////////
248/// options that are used ONLY for the READER to ensure backward compatibility
249
251{
253 DeclareOptionRef( fNSubSets = 1, "NSubSets", "Number of training subsets" );
254 DeclareOptionRef( fTheKernel = "Gauss", "Kernel", "Uses kernel function");
255 // for gaussian kernel parameter(s)
256 DeclareOptionRef( fDoubleSigmaSquared = 2., "Sigma", "Kernel parameter: sigma");
257 // for polynomial kernel parameter(s)
258 DeclareOptionRef( fOrder = 3, "Order", "Polynomial Kernel parameter: polynomial order");
259 // for sigmoid kernel parameters
260 DeclareOptionRef( fTheta = 1., "Theta", "Sigmoid Kernel parameter: theta");
261 DeclareOptionRef( fKappa = 1., "Kappa", "Sigmoid Kernel parameter: kappa");
262}
263
264////////////////////////////////////////////////////////////////////////////////
265/// option post processing (if necessary)
266
268{
270 Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
272 << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
273 << Endl;
274 }
275}
276
277////////////////////////////////////////////////////////////////////////////////
278/// Train SVM
279
281{
283 Data()->SetCurrentType(Types::kTraining);
284
285 Log() << kDEBUG << "Create event vector"<< Endl;
286
287 fDataSize = Data()->GetNEvents();
288 Int_t nSignal = Data()->GetNEvtSigTrain();
289 Int_t nBackground = Data()->GetNEvtBkgdTrain();
290 Double_t CSig;
291 Double_t CBkg;
292
293 // Use number of signal and background from above to weight the cost parameter
294 // so that the training is not biased towards the larger dataset when the signal
295 // and background samples are significantly different sizes.
296 if(nSignal < nBackground){
297 CSig = fCost;
298 CBkg = CSig*((double)nSignal/nBackground);
299 }
300 else{
301 CBkg = fCost;
302 CSig = CBkg*((double)nSignal/nBackground);
303 }
304
305 // Loop over events and assign the correct cost parameter.
306 for (Int_t ievnt=0; ievnt<Data()->GetNEvents(); ievnt++){
307 if (GetEvent(ievnt)->GetWeight() != 0){
308 if(DataInfo().IsSignal(GetEvent(ievnt))){
309 fInputData->push_back(new SVEvent(GetEvent(ievnt), CSig, DataInfo().IsSignal\
310 (GetEvent(ievnt))));
311 }
312 else{
313 fInputData->push_back(new SVEvent(GetEvent(ievnt), CBkg, DataInfo().IsSignal\
314 (GetEvent(ievnt))));
315 }
316 }
317 }
318
319 // Set the correct kernel function.
320 // Here we only use valid Mercer kernels. In the literature some people have reported reasonable
321 // results using Sigmoid kernel function however that is not a valid Mercer kernel and is not used here.
322 if( fTheKernel == "RBF"){
324 }
325 else if( fTheKernel == "MultiGauss" ){
326 if(fGammas!=""){
329 }
330 else{
331 if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
332 else{
333 for(Int_t ngammas=0; ngammas<fNumVars; ++ngammas){
334 fmGamma.push_back(1.0);
335 }
337 }
338 }
340 }
341 else if( fTheKernel == "Polynomial" ){
343 }
344 else if( fTheKernel == "Prod" ){
345 if(fGammas!=""){
348 }
349 else{
350 if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
351 }
353 }
354 else if( fTheKernel == "Sum" ){
355 if(fGammas!=""){
358 }
359 else{
360 if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
361 }
363 }
364 else {
365 Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
366 exit(1);
367 }
368
369 Log()<< kINFO << "Building SVM Working Set...with "<<fInputData->size()<<" event instances"<< Endl;
370 Timer bldwstime( GetName());
372 Log() << kINFO <<"Elapsed time for Working Set build: "<< bldwstime.GetElapsedTime()<<Endl;
373
374 // timing
375 Timer timer( GetName() );
376 Log() << kINFO << "Sorry, no computing time forecast available for SVM, please wait ..." << Endl;
377
378 if (fInteractive) fWgSet->SetIPythonInteractive(&fExitFromTraining, &fIPyCurrentIter);
379
380 fWgSet->Train(fMaxIter);
381
382 Log() << kINFO << "Elapsed time: " << timer.GetElapsedTime()
383 << " " << Endl;
384
385 fBparm = fWgSet->GetBpar();
386 fSupportVectors = fWgSet->GetSupportVectors();
387 delete fWgSet;
388 fWgSet=0;
389
392}
393
394////////////////////////////////////////////////////////////////////////////////
395/// write configuration to xml file
396
397void TMVA::MethodSVM::AddWeightsXMLTo( void* parent ) const
398{
399 void* wght = gTools().AddChild(parent, "Weights");
400 gTools().AddAttr(wght,"fBparm",fBparm);
401 gTools().AddAttr(wght,"fGamma",fGamma);
402 gTools().AddAttr(wght,"fGammaList",fGammaList);
403 gTools().AddAttr(wght,"fTheta",fTheta);
404 gTools().AddAttr(wght,"fOrder",fOrder);
405 gTools().AddAttr(wght,"NSupVec",fSupportVectors->size());
406
407 for (std::vector<TMVA::SVEvent*>::iterator veciter=fSupportVectors->begin();
408 veciter!=fSupportVectors->end() ; ++veciter ) {
409 TVectorD temp(GetNvar()+4);
410 temp[0] = (*veciter)->GetNs();
411 temp[1] = (*veciter)->GetTypeFlag();
412 temp[2] = (*veciter)->GetAlpha();
413 temp[3] = (*veciter)->GetAlpha_p();
414 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
415 temp[ivar+4] = (*(*veciter)->GetDataVector())[ivar];
416 gTools().WriteTVectorDToXML(wght,"SupportVector",&temp);
417 }
418 // write max/min data values
419 void* maxnode = gTools().AddChild(wght, "Maxima");
420 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
421 gTools().AddAttr(maxnode, "Var"+gTools().StringFromInt(ivar), GetXmax(ivar));
422 void* minnode = gTools().AddChild(wght, "Minima");
423 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
424 gTools().AddAttr(minnode, "Var"+gTools().StringFromInt(ivar), GetXmin(ivar));
425}
426
427////////////////////////////////////////////////////////////////////////////////
428
430{
431 gTools().ReadAttr( wghtnode, "fBparm",fBparm );
432 gTools().ReadAttr( wghtnode, "fGamma",fGamma);
433 gTools().ReadAttr( wghtnode, "fGammaList",fGammaList);
434 gTools().ReadAttr( wghtnode, "fOrder",fOrder);
435 gTools().ReadAttr( wghtnode, "fTheta",fTheta);
436 UInt_t fNsupv=0;
437 gTools().ReadAttr( wghtnode, "NSupVec",fNsupv );
438
439 Float_t alpha=0.;
440 Float_t alpha_p = 0.;
441
442 Int_t typeFlag=-1;
443 // UInt_t ns = 0;
444 std::vector<Float_t>* svector = new std::vector<Float_t>(GetNvar());
445
446 if (fMaxVars!=0) delete fMaxVars;
447 fMaxVars = new TVectorD( GetNvar() );
448 if (fMinVars!=0) delete fMinVars;
449 fMinVars = new TVectorD( GetNvar() );
450 if (fSupportVectors!=0) {
451 for (vector< SVEvent* >::iterator it = fSupportVectors->begin(); it!=fSupportVectors->end(); ++it)
452 delete *it;
453 delete fSupportVectors;
454 }
455 fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
456 void* supportvectornode = gTools().GetChild(wghtnode);
457 for (UInt_t ievt = 0; ievt < fNsupv; ievt++) {
458 TVectorD temp(GetNvar()+4);
459 gTools().ReadTVectorDFromXML(supportvectornode,"SupportVector",&temp);
460 // ns=(UInt_t)temp[0];
461 typeFlag=(int)temp[1];
462 alpha=temp[2];
463 alpha_p=temp[3];
464 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) (*svector)[ivar]=temp[ivar+4];
465
466 fSupportVectors->push_back(new SVEvent(svector,alpha,alpha_p,typeFlag));
467 supportvectornode = gTools().GetNextChild(supportvectornode);
468 }
469
470 void* maxminnode = supportvectornode;
471 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
472 gTools().ReadAttr( maxminnode,"Var"+gTools().StringFromInt(ivar),(*fMaxVars)[ivar]);
473 maxminnode = gTools().GetNextChild(maxminnode);
474 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
475 gTools().ReadAttr( maxminnode,"Var"+gTools().StringFromInt(ivar),(*fMinVars)[ivar]);
477 if( fTheKernel == "RBF" ){
479 }
480 else if( fTheKernel == "MultiGauss" ){
483 }
484 else if( fTheKernel == "Polynomial" ){
486 }
487 else if( fTheKernel == "Prod" ){
490 }
491 else if( fTheKernel == "Sum" ){
494 }
495 else {
496 Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
497 exit(1);
498 }
499 delete svector;
500}
501
502////////////////////////////////////////////////////////////////////////////////
503///TODO write IT
504/// write training sample (TTree) to file
505
509
510////////////////////////////////////////////////////////////////////////////////
511
513{
514 if (fSupportVectors !=0) { delete fSupportVectors; fSupportVectors = 0;}
515 fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
516
517 // read configuration from input stream
518 istr >> fBparm;
519
520 UInt_t fNsupv;
521 // coverity[tainted_data_argument]
522 istr >> fNsupv;
523 fSupportVectors->reserve(fNsupv);
524
525 Float_t typeTalpha=0.;
526 Float_t alpha=0.;
527 Int_t typeFlag=-1;
528 UInt_t ns = 0;
529 std::vector<Float_t>* svector = new std::vector<Float_t>(GetNvar());
530
531 fMaxVars = new TVectorD( GetNvar() );
532 fMinVars = new TVectorD( GetNvar() );
533
534 for (UInt_t ievt = 0; ievt < fNsupv; ievt++) {
535 istr>>ns;
536 istr>>typeTalpha;
537 typeFlag = typeTalpha<0?-1:1;
538 alpha = typeTalpha<0?-typeTalpha:typeTalpha;
539 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> svector->at(ivar);
540
541 fSupportVectors->push_back(new SVEvent(svector,alpha,typeFlag,ns));
542 }
543
544 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMaxVars)[ivar];
545
546 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMinVars)[ivar];
547
548 delete fSVKernelFunction;
549 if (fTheKernel == "Gauss" ) {
551 }
552 else {
554 if(fTheKernel == "Linear") k = SVKernelFunction::kLinear;
555 else if (fTheKernel == "Polynomial") k = SVKernelFunction::kPolynomial;
556 else if (fTheKernel == "Sigmoid" ) k = SVKernelFunction::kSigmoidal;
557 else {
558 Log() << kFATAL <<"Unknown kernel function found in weight file!" << Endl;
559 }
561 fSVKernelFunction->setCompatibilityParams(k, fOrder, fTheta, fKappa);
562 }
563 delete svector;
564}
565
566////////////////////////////////////////////////////////////////////////////////
567/// TODO write IT
568
570{
571}
572
573////////////////////////////////////////////////////////////////////////////////
574/// returns MVA value for given event
575
577{
578 Double_t myMVA = 0;
579
580 // TODO: avoid creation of a new SVEvent every time (Joerg)
581 SVEvent* ev = new SVEvent( GetEvent(), 0. ); // check for specificators
582
583 for (UInt_t ievt = 0; ievt < fSupportVectors->size() ; ievt++) {
584 myMVA += ( fSupportVectors->at(ievt)->GetAlpha()
585 * fSupportVectors->at(ievt)->GetTypeFlag()
586 * fSVKernelFunction->Evaluate( fSupportVectors->at(ievt), ev ) );
587 }
588
589 delete ev;
590
591 myMVA -= fBparm;
592
593 // cannot determine error
594 NoErrorCalc(err, errUpper);
595
596 // 08/12/09: changed sign here to make results agree with convention signal=1
597 return 1.0/(1.0 + TMath::Exp(myMVA));
598}
599////////////////////////////////////////////////////////////////////////////////
600
601const std::vector<Float_t>& TMVA::MethodSVM::GetRegressionValues()
602{
603 if( fRegressionReturnVal == NULL )
604 fRegressionReturnVal = new std::vector<Float_t>();
605 fRegressionReturnVal->clear();
606
607 Double_t myMVA = 0;
608
609 const Event *baseev = GetEvent();
610 SVEvent* ev = new SVEvent( baseev,0. ); //check for specificators
611
612 for (UInt_t ievt = 0; ievt < fSupportVectors->size() ; ievt++) {
613 myMVA += ( fSupportVectors->at(ievt)->GetDeltaAlpha()
614 *fSVKernelFunction->Evaluate( fSupportVectors->at(ievt), ev ) );
615 }
616 myMVA += fBparm;
617 Event * evT = new Event(*baseev);
618 evT->SetTarget(0,myMVA);
619
620 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
621
622 fRegressionReturnVal->push_back(evT2->GetTarget(0));
623
624 delete evT;
625
626 delete ev;
627
628 return *fRegressionReturnVal;
629}
630
631////////////////////////////////////////////////////////////////////////////////
632/// write specific classifier response
633
634void TMVA::MethodSVM::MakeClassSpecific( std::ostream& fout, const TString& className ) const
635{
636 const int fNsupv = fSupportVectors->size();
637 fout << " // not implemented for class: \"" << className << "\"" << std::endl;
638 fout << " float fBparameter;" << std::endl;
639 fout << " int fNOfSuppVec;" << std::endl;
640 fout << " static float fAllSuppVectors[][" << fNsupv << "];" << std::endl;
641 fout << " static float fAlphaTypeCoef[" << fNsupv << "];" << std::endl;
642 fout << std::endl;
643 fout << " // Kernel parameter(s) " << std::endl;
644 fout << " float fGamma;" << std::endl;
645 fout << "};" << std::endl;
646 fout << "" << std::endl;
647
648 //Initialize function definition
649 fout << "inline void " << className << "::Initialize() " << std::endl;
650 fout << "{" << std::endl;
651 fout << " fBparameter = " << fBparm << ";" << std::endl;
652 fout << " fNOfSuppVec = " << fNsupv << ";" << std::endl;
653 fout << " fGamma = " << fGamma << ";" <<std::endl;
654 fout << "}" << std::endl;
655 fout << std::endl;
656
657 // GetMvaValue__ function definition
658 fout << "inline double " << className << "::GetMvaValue__(const std::vector<double>& inputValues ) const" << std::endl;
659 fout << "{" << std::endl;
660 fout << " double mvaval = 0; " << std::endl;
661 fout << " double temp = 0; " << std::endl;
662 fout << std::endl;
663 fout << " for (int ievt = 0; ievt < fNOfSuppVec; ievt++ ){" << std::endl;
664 fout << " temp = 0;" << std::endl;
665 fout << " for ( unsigned int ivar = 0; ivar < GetNvar(); ivar++ ) {" << std::endl;
666
667 fout << " temp += (fAllSuppVectors[ivar][ievt] - inputValues[ivar]) " << std::endl;
668 fout << " * (fAllSuppVectors[ivar][ievt] - inputValues[ivar]); " << std::endl;
669 fout << " }" << std::endl;
670 fout << " mvaval += fAlphaTypeCoef[ievt] * exp( -fGamma * temp ); " << std::endl;
671
672 fout << " }" << std::endl;
673 fout << " mvaval -= fBparameter;" << std::endl;
674 fout << " return 1./(1. + exp(mvaval));" << std::endl;
675 fout << "}" << std::endl;
676 fout << "// Clean up" << std::endl;
677 fout << "inline void " << className << "::Clear() " << std::endl;
678 fout << "{" << std::endl;
679 fout << " // nothing to clear " << std::endl;
680 fout << "}" << std::endl;
681 fout << "" << std::endl;
682
683 // define support vectors
684 fout << "float " << className << "::fAlphaTypeCoef[] =" << std::endl;
685 fout << "{ ";
686 for (Int_t isv = 0; isv < fNsupv; isv++) {
687 fout << fSupportVectors->at(isv)->GetDeltaAlpha() * fSupportVectors->at(isv)->GetTypeFlag();
688 if (isv < fNsupv-1) fout << ", ";
689 }
690 fout << " };" << std::endl << std::endl;
691
692 fout << "float " << className << "::fAllSuppVectors[][" << fNsupv << "] =" << std::endl;
693 fout << "{";
694 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
695 fout << std::endl;
696 fout << " { ";
697 for (Int_t isv = 0; isv < fNsupv; isv++){
698 fout << fSupportVectors->at(isv)->GetDataVector()->at(ivar);
699 if (isv < fNsupv-1) fout << ", ";
700 }
701 fout << " }";
702 if (ivar < GetNvar()-1) fout << ", " << std::endl;
703 else fout << std::endl;
704 }
705 fout << "};" << std::endl<< std::endl;
706}
707
708////////////////////////////////////////////////////////////////////////////////
709/// get help message text
710///
711/// typical length of text line:
712/// "|--------------------------------------------------------------|"
713
715{
716 Log() << Endl;
717 Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
718 Log() << Endl;
719 Log() << "The Support Vector Machine (SVM) builds a hyperplane separating" << Endl;
720 Log() << "signal and background events (vectors) using the minimal subset of " << Endl;
721 Log() << "all vectors used for training (support vectors). The extension to" << Endl;
722 Log() << "the non-linear case is performed by mapping input vectors into a " << Endl;
723 Log() << "higher-dimensional feature space in which linear separation is " << Endl;
724 Log() << "possible. The use of the kernel functions thereby eliminates the " << Endl;
725 Log() << "explicit transformation to the feature space. The implemented SVM " << Endl;
726 Log() << "algorithm performs the classification tasks using linear, polynomial, " << Endl;
727 Log() << "Gaussian and sigmoidal kernel functions. The Gaussian kernel allows " << Endl;
728 Log() << "to apply any discriminant shape in the input space." << Endl;
729 Log() << Endl;
730 Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
731 Log() << Endl;
732 Log() << "SVM is a general purpose non-linear classification method, which " << Endl;
733 Log() << "does not require data preprocessing like decorrelation or Principal " << Endl;
734 Log() << "Component Analysis. It generalises quite well and can handle analyses " << Endl;
735 Log() << "with large numbers of input variables." << Endl;
736 Log() << Endl;
737 Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
738 Log() << Endl;
739 Log() << "Optimal performance requires primarily a proper choice of the kernel " << Endl;
740 Log() << "parameters (the width \"Sigma\" in case of Gaussian kernel) and the" << Endl;
741 Log() << "cost parameter \"C\". The user must optimise them empirically by running" << Endl;
742 Log() << "SVM several times with different parameter sets. The time needed for " << Endl;
743 Log() << "each evaluation scales like the square of the number of training " << Endl;
744 Log() << "events so that a coarse preliminary tuning should be performed on " << Endl;
745 Log() << "reduced data sets." << Endl;
746}
747
748////////////////////////////////////////////////////////////////////////////////
749/// Optimize Tuning Parameters
750/// This is used to optimise the kernel function parameters and cost. All kernel parameters
751/// are optimised by default with default ranges, however the parameters to be optimised can
752/// be set when booking the method with the option Tune.
753///
754/// Example:
755///
756/// "Tune=Gamma[0.01;1.0;100]" would only tune the RBF Gamma between 0.01 and 1.0
757/// with 100 steps.
758
759std::map<TString,Double_t> TMVA::MethodSVM::OptimizeTuningParameters(TString fomType, TString fitType)
760{
761 // Call the Optimizer with the set of kernel parameters and ranges that are meant to be tuned.
762 std::map< TString,std::vector<Double_t> > optVars;
763 // Get parameters and options specified in booking of method.
764 if(fTune != "All"){
765 optVars= GetTuningOptions();
766 }
767 std::map< TString,std::vector<Double_t> >::iterator iter;
768 // Fill all the tuning parameters that should be optimized into a map
769 std::map<TString,TMVA::Interval*> tuneParameters;
770 std::map<TString,Double_t> tunedParameters;
771 // Note: the 3rd parameter in the interval is the "number of bins", NOT the stepsize!!
772 // The actual values are always read from the middle of the bins.
773 Log() << kINFO << "Using the " << fTheKernel << " kernel." << Endl;
774 // Setup map of parameters based on the specified options or defaults.
775 if( fTheKernel == "RBF" ){
776 if(fTune == "All"){
777 tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
778 tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
779 }
780 else{
781 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
782 if( iter->first == "Gamma" || iter->first == "C"){
783 tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
784 }
785 else{
786 Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
787 exit(1);
788 }
789 }
790 }
791 }
792 else if( fTheKernel == "Polynomial" ){
793 if (fTune == "All"){
794 tuneParameters.insert(std::pair<TString,Interval*>("Order", new Interval(1,10,10)));
795 tuneParameters.insert(std::pair<TString,Interval*>("Theta", new Interval(0.01,1.,100)));
796 tuneParameters.insert(std::pair<TString,Interval*>("C", new Interval(0.01,1.,100)));
797 }
798 else{
799 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
800 if( iter->first == "Theta" || iter->first == "C"){
801 tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
802 }
803 else if( iter->first == "Order"){
804 tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
805 }
806 else{
807 Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
808 exit(1);
809 }
810 }
811 }
812 }
813 else if( fTheKernel == "MultiGauss" ){
814 if (fTune == "All"){
815 for(int i=0; i<fNumVars; i++){
816 stringstream s;
817 s << fVarNames.at(i);
818 string str = "Gamma_" + s.str();
819 tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
820 }
821 tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
822 } else {
823 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
824 if( iter->first == "GammaList"){
825 for(int j=0; j<fNumVars; j++){
826 stringstream s;
827 s << fVarNames.at(j);
828 string str = "Gamma_" + s.str();
829 tuneParameters.insert(std::pair<TString,Interval*>(str, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
830 }
831 }
832 else if( iter->first == "C"){
833 tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
834 }
835 else{
836 Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
837 exit(1);
838 }
839 }
840 }
841 }
842 else if( fTheKernel == "Prod" ){
843 std::stringstream tempstring(fMultiKernels);
844 std::string value;
845 while (std::getline(tempstring,value,'*')){
846 if(value == "RBF"){
847 tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
848 }
849 else if(value == "MultiGauss"){
850 for(int i=0; i<fNumVars; i++){
851 stringstream s;
852 s << fVarNames.at(i);
853 string str = "Gamma_" + s.str();
854 tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
855 }
856 }
857 else if(value == "Polynomial"){
858 tuneParameters.insert(std::pair<TString,Interval*>("Order",new Interval(1,10,10)));
859 tuneParameters.insert(std::pair<TString,Interval*>("Theta",new Interval(0.0,1.0,101)));
860 }
861 else {
862 Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
863 exit(1);
864 }
865 }
866 tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
867 }
868 else if( fTheKernel == "Sum" ){
869 std::stringstream tempstring(fMultiKernels);
870 std::string value;
871 while (std::getline(tempstring,value,'+')){
872 if(value == "RBF"){
873 tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
874 }
875 else if(value == "MultiGauss"){
876 for(int i=0; i<fNumVars; i++){
877 stringstream s;
878 s << fVarNames.at(i);
879 string str = "Gamma_" + s.str();
880 tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
881 }
882 }
883 else if(value == "Polynomial"){
884 tuneParameters.insert(std::pair<TString,Interval*>("Order",new Interval(1,10,10)));
885 tuneParameters.insert(std::pair<TString,Interval*>("Theta",new Interval(0.0,1.0,101)));
886 }
887 else {
888 Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
889 exit(1);
890 }
891 }
892 tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
893 }
894 else {
895 Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
896 exit(1);
897 }
898 Log() << kINFO << " the following SVM parameters will be tuned on the respective *grid*\n" << Endl;
899 std::map<TString,TMVA::Interval*>::iterator it;
900 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
901 Log() << kWARNING << it->first <<Endl;
902 std::ostringstream oss;
903 (it->second)->Print(oss);
904 Log()<<oss.str();
905 Log()<<Endl;
906 }
907 OptimizeConfigParameters optimize(this, tuneParameters, fomType, fitType);
908 tunedParameters=optimize.optimize();
909
910 return tunedParameters;
911
912}
913
914////////////////////////////////////////////////////////////////////////////////
915/// Set the tuning parameters according to the argument
916void TMVA::MethodSVM::SetTuneParameters(std::map<TString,Double_t> tuneParameters)
917{
918 std::map<TString,Double_t>::iterator it;
919 if( fTheKernel == "RBF" ){
920 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
921 Log() << kWARNING << it->first << " = " << it->second << Endl;
922 if (it->first == "Gamma"){
923 SetGamma (it->second);
924 }
925 else if(it->first == "C"){
926 SetCost (it->second);
927 }
928 else {
929 Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
930 }
931 }
932 }
933 else if( fTheKernel == "MultiGauss" ){
934 fmGamma.clear();
935 for(int i=0; i<fNumVars; i++){
936 stringstream s;
937 s << fVarNames.at(i);
938 string str = "Gamma_" + s.str();
939 Log() << kWARNING << tuneParameters.find(str)->first << " = " << tuneParameters.find(str)->second << Endl;
940 fmGamma.push_back(tuneParameters.find(str)->second);
941 }
942 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
943 if (it->first == "C"){
944 Log() << kWARNING << it->first << " = " << it->second << Endl;
945 SetCost(it->second);
946 break;
947 }
948 }
949 }
950 else if( fTheKernel == "Polynomial" ){
951 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
952 Log() << kWARNING << it->first << " = " << it->second << Endl;
953 if (it->first == "Order"){
954 SetOrder(it->second);
955 }
956 else if (it->first == "Theta"){
957 SetTheta(it->second);
958 }
959 else if(it->first == "C"){ SetCost (it->second);
960 }
961 else if(it->first == "Mult"){
962 SetMult(it->second);
963 }
964 else{
965 Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
966 }
967 }
968 }
969 else if( fTheKernel == "Prod" || fTheKernel == "Sum"){
970 fmGamma.clear();
971 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
972 bool foundParam = false;
973 Log() << kWARNING << it->first << " = " << it->second << Endl;
974 for(int i=0; i<fNumVars; i++){
975 stringstream s;
976 s << fVarNames.at(i);
977 string str = "Gamma_" + s.str();
978 if(it->first == str){
979 fmGamma.push_back(it->second);
980 foundParam = true;
981 }
982 }
983 if (it->first == "Gamma"){
984 SetGamma (it->second);
985 foundParam = true;
986 }
987 else if (it->first == "Order"){
988 SetOrder (it->second);
989 foundParam = true;
990 }
991 else if (it->first == "Theta"){
992 SetTheta (it->second);
993 foundParam = true;
994 }
995 else if (it->first == "C"){ SetCost (it->second);
996 SetCost (it->second);
997 foundParam = true;
998 }
999 else{
1000 if(!foundParam){
1001 Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
1002 }
1003 }
1004 }
1005 }
1006 else {
1007 Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
1008 exit(1);
1009 }
1010}
1011
1012////////////////////////////////////////////////////////////////////////////////
1013/// Takes as input a string of values for multigaussian gammas and splits it, filling the
1014/// gamma vector required by the SVKernelFunction. Example: "GammaList=0.1,0.2,0.3" would
1015/// make a vector with Gammas of 0.1,0.2 & 0.3 corresponding to input variables 1,2 & 3
1016/// respectively.
1017void TMVA::MethodSVM::SetMGamma(std::string & mg){
1018 std::stringstream tempstring(mg);
1019 Float_t value;
1020 while (tempstring >> value){
1021 fmGamma.push_back(value);
1022
1023 if (tempstring.peek() == ','){
1024 tempstring.ignore();
1025 }
1026 }
1027}
1028
1029////////////////////////////////////////////////////////////////////////////////
1030/// Produces GammaList string for multigaussian kernel to be written to xml file
1031void TMVA::MethodSVM::GetMGamma(const std::vector<float> & gammas){
1032 std::ostringstream tempstring;
1033 for(UInt_t i = 0; i<gammas.size(); ++i){
1034 tempstring << gammas.at(i);
1035 if(i!=(gammas.size()-1)){
1036 tempstring << ",";
1037 }
1038 }
1039 fGammaList= tempstring.str();
1040}
1041
1042////////////////////////////////////////////////////////////////////////////////
1043/// MakeKernelList
1044/// Function providing string manipulation for product or sum of kernels functions
1045/// to take list of kernels specified in the booking of the method and provide a vector
1046/// of SV kernels to iterate over in SVKernelFunction.
1047///
1048/// Example:
1049///
1050/// "KernelList=RBF*Polynomial" would use a product of the RBF and Polynomial
1051/// kernels.
1052
1053std::vector<TMVA::SVKernelFunction::EKernelType> TMVA::MethodSVM::MakeKernelList(std::string multiKernels, TString kernel)
1054{
1055 std::vector<TMVA::SVKernelFunction::EKernelType> kernelsList;
1056 std::stringstream tempstring(multiKernels);
1057 std::string value;
1058 if(kernel=="Prod"){
1059 while (std::getline(tempstring,value,'*')){
1060 if(value == "RBF"){ kernelsList.push_back(SVKernelFunction::kRBF);}
1061 else if(value == "MultiGauss"){
1062 kernelsList.push_back(SVKernelFunction::kMultiGauss);
1063 if(fGammas!=""){
1065 }
1066 }
1067 else if(value == "Polynomial"){ kernelsList.push_back(SVKernelFunction::kPolynomial);}
1068 else {
1069 Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
1070 exit(1);
1071 }
1072 }
1073 }
1074 else if(kernel=="Sum"){
1075 while (std::getline(tempstring,value,'+')){
1076 if(value == "RBF"){ kernelsList.push_back(SVKernelFunction::kRBF);}
1077 else if(value == "MultiGauss"){
1078 kernelsList.push_back(SVKernelFunction::kMultiGauss);
1079 if(fGammas!=""){
1081 }
1082 }
1083 else if(value == "Polynomial"){ kernelsList.push_back(SVKernelFunction::kPolynomial);}
1084 else {
1085 Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
1086 exit(1);
1087 }
1088 }
1089 }
1090 else {
1091 Log() << kWARNING << "Unable to split MultiKernels. Delimiters */+ required." << Endl;
1092 exit(1);
1093 }
1094 return kernelsList;
1095}
1096
1097////////////////////////////////////////////////////////////////////////////////
1098/// GetTuningOptions
1099/// Function to allow for ranges and number of steps (for scan) when optimising kernel
1100/// function parameters. Specified when booking the method after the parameter to be
1101/// optimised between square brackets with each value separated by ;, the first value
1102/// is the lower limit, the second the upper limit and the third is the number of steps.
1103/// Example: "Tune=Gamma[0.01;1.0;100]" would only tune the RBF Gamma between 0.01 and
1104/// 100 steps.
1105std::map< TString,std::vector<Double_t> > TMVA::MethodSVM::GetTuningOptions()
1106{
1107 std::map< TString,std::vector<Double_t> > optVars;
1108 std::stringstream tempstring(fTune);
1109 std::string value;
1110 while (std::getline(tempstring,value,',')){
1111 unsigned first = value.find('[')+1;
1112 unsigned last = value.find_last_of(']');
1113 std::string optParam = value.substr(0,first-1);
1114 std::stringstream strNew (value.substr(first,last-first));
1115 Double_t optInterval;
1116 std::vector<Double_t> tempVec;
1117 UInt_t i = 0;
1118 while (strNew >> optInterval){
1119 tempVec.push_back(optInterval);
1120 if (strNew.peek() == ';'){
1121 strNew.ignore();
1122 }
1123 ++i;
1124 }
1125 if(i != 3 && i == tempVec.size()){
1126 if(optParam == "C" || optParam == "Gamma" || optParam == "GammaList" || optParam == "Theta"){
1127 switch(i){
1128 case 0:
1129 tempVec.push_back(0.01);
1130 case 1:
1131 tempVec.push_back(1.);
1132 case 2:
1133 tempVec.push_back(100);
1134 }
1135 }
1136 else if(optParam == "Order"){
1137 switch(i){
1138 case 0:
1139 tempVec.push_back(1);
1140 case 1:
1141 tempVec.push_back(10);
1142 case 2:
1143 tempVec.push_back(10);
1144 }
1145 }
1146 else{
1147 Log() << kWARNING << optParam << " is not a recognised tuneable parameter." << Endl;
1148 exit(1);
1149 }
1150 }
1151 optVars.insert(std::pair<TString,std::vector<Double_t> >(optParam,tempVec));
1152 }
1153 return optVars;
1154}
1155
1156////////////////////////////////////////////////////////////////////////////////
1157/// getLoss
1158/// Calculates loss for testing dataset. The loss function can be specified when
1159/// booking the method, otherwise defaults to hinge loss. Currently not used however
1160/// is accesible if required.
1161
1163 Double_t loss = 0.0;
1164 Double_t sumW = 0.0;
1165 Double_t temp = 0.0;
1166 Data()->SetCurrentType(Types::kTesting);
1168 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1169 const Event* ev = GetEvent(ievt);
1170 Float_t v = (*mvaRes)[ievt][0];
1171 Float_t w = ev->GetWeight();
1172 if(DataInfo().IsSignal(ev)){
1173 if(lossFunction == "hinge"){
1174 temp += w*(1-v);
1175 }
1176 else if(lossFunction == "exp"){
1177 temp += w*TMath::Exp(-v);
1178 }
1179 else if(lossFunction == "binomial"){
1180 temp += w*TMath::Log(1+TMath::Exp(-2*v));
1181 }
1182 else{
1183 Log() << kWARNING << lossFunction << " is not a recognised loss function." << Endl;
1184 exit(1);
1185 }
1186 }
1187 else{
1188 if(lossFunction == "hinge"){
1189 temp += w*v;
1190 }
1191 else if(lossFunction == "exp"){
1192 temp += w*TMath::Exp(-(1-v));
1193 }
1194 else if(lossFunction == "binomial"){
1195 temp += w*TMath::Log(1+TMath::Exp(-2*(1-v)));
1196 }
1197 else{
1198 Log() << kWARNING << lossFunction << " is not a recognised loss function." << Endl;
1199 exit(1);
1200 }
1201 }
1202 sumW += w;
1203 }
1204 loss = temp/sumW;
1205
1206 return loss;
1207}
#define REGISTER_METHOD(CLASS)
for example
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
int Int_t
Signed integer 4 bytes (int).
Definition RtypesCore.h:59
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
Definition RtypesCore.h:60
bool Bool_t
Boolean (0=false, 1=true) (bool).
Definition RtypesCore.h:77
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
long long Long64_t
Portable signed long integer 8 bytes.
Definition RtypesCore.h:83
float Float_t
Float 4 bytes (float).
Definition RtypesCore.h:71
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
Double_t err
TVectorT< Double_t > TVectorD
Definition TVectorDfwd.h:23
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
Definition TFile.h:130
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
MsgLogger & Log() const
Class that contains all the data information.
Definition DataSetInfo.h:62
std::vector< VariableInfo > & GetVariableInfos()
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition Event.cxx:367
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition Event.cxx:389
Float_t GetTarget(UInt_t itgt) const
Definition Event.h:102
The TMVA::Interval Class.
Definition Interval.h:61
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
const char * GetName() const override
Definition MethodBase.h:337
Double_t GetXmin(Int_t ivar) const
Definition MethodBase.h:359
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString GetMethodTypeName() const
Definition MethodBase.h:335
Types::EAnalysisType GetAnalysisType() const
Definition MethodBase.h:440
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition MethodBase.h:689
const TString & GetMethodName() const
Definition MethodBase.h:334
void ExitFromTraining()
Definition MethodBase.h:467
UInt_t GetNEvents() const
Definition MethodBase.h:419
Bool_t DoRegression() const
Definition MethodBase.h:441
std::vector< Float_t > * fRegressionReturnVal
Definition MethodBase.h:600
const Event * GetEvent() const
Definition MethodBase.h:754
DataSetInfo & DataInfo() const
Definition MethodBase.h:413
void SetNormalised(Bool_t norm)
Definition MethodBase.h:500
UInt_t GetNvar() const
Definition MethodBase.h:347
Double_t GetXmax(Int_t ivar) const
Definition MethodBase.h:360
UInt_t fIPyCurrentIter
Definition MethodBase.h:453
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition MethodBase.h:397
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
DataSet * Data() const
Definition MethodBase.h:412
IPythonInteractive * fInteractive
Definition MethodBase.h:451
Double_t getLoss(TString lossFunction)
getLoss Calculates loss for testing dataset.
Float_t fTolerance
tolerance parameter
Definition MethodSVM.h:134
TVectorD * fMaxVars
for normalization //is it still needed??
Definition MethodSVM.h:145
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
SVM can handle classification with 2 classes and regression with one regression-target.
void Init(void) override
default initialisation
void Reset(void) override
void ReadWeightsFromXML(void *wghtnode) override
TVectorD * fMinVars
for normalization //is it still needed??
Definition MethodSVM.h:144
void Train(void) override
Train SVM.
std::vector< TString > fVarNames
Definition MethodSVM.h:156
void MakeClassSpecific(std::ostream &, const TString &) const override
write specific classifier response
void WriteWeightsToStream(TFile &fout) const
TODO write IT write training sample (TTree) to file.
void SetMGamma(std::string &mg)
Takes as input a string of values for multigaussian gammas and splits it, filling the gamma vector re...
void SetCost(Double_t c)
Definition MethodSVM.h:108
SVKernelFunction * fSVKernelFunction
kernel function
Definition MethodSVM.h:142
Float_t fBparm
free plane coefficient
Definition MethodSVM.h:137
void GetHelpMessage() const override
get help message text
Float_t fDoubleSigmaSquared
for RBF Kernel
Definition MethodSVM.h:149
void SetTuneParameters(std::map< TString, Double_t > tuneParameters) override
Set the tuning parameters according to the argument.
void GetMGamma(const std::vector< float > &gammas)
Produces GammaList string for multigaussian kernel to be written to xml file.
Float_t fNumVars
number of input variables for multi-gaussian
Definition MethodSVM.h:155
void AddWeightsXMLTo(void *parent) const override
write configuration to xml file
Int_t fOrder
for Polynomial Kernel ( polynomial order )
Definition MethodSVM.h:150
void SetTheta(Double_t t)
Definition MethodSVM.h:111
void SetGamma(Double_t g)
Definition MethodSVM.h:107
std::vector< TMVA::SVEvent * > * fSupportVectors
contains support vectors
Definition MethodSVM.h:141
Float_t fKappa
for Sigmoidal Kernel
Definition MethodSVM.h:152
std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="Minuit") override
Optimize Tuning Parameters This is used to optimise the kernel function parameters and cost.
Float_t fGamma
RBF Kernel parameter.
Definition MethodSVM.h:138
void SetOrder(Double_t o)
Definition MethodSVM.h:110
void ProcessOptions() override
option post processing (if necessary)
UShort_t fNSubSets
nr of subsets, default 1
Definition MethodSVM.h:136
void DeclareOptions() override
declare options available for this method
std::map< TString, std::vector< Double_t > > GetTuningOptions()
GetTuningOptions Function to allow for ranges and number of steps (for scan) when optimising kernel f...
std::string fGammas
Definition MethodSVM.h:157
std::vector< Float_t > fmGamma
vector of gammas for multi-gaussian kernel
Definition MethodSVM.h:154
Float_t fCost
cost value
Definition MethodSVM.h:133
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
returns MVA value for given event
void ReadWeightsFromStream(std::istream &istr) override
std::vector< TMVA::SVEvent * > * fInputData
vector of training data in SVM format
Definition MethodSVM.h:140
const std::vector< Float_t > & GetRegressionValues() override
virtual ~MethodSVM(void)
destructor
void DeclareCompatibilityOptions() override
options that are used ONLY for the READER to ensure backward compatibility
TString fTheKernel
kernel name
Definition MethodSVM.h:148
std::string fMultiKernels
Definition MethodSVM.h:160
Float_t fTheta
for Sigmoidal Kernel
Definition MethodSVM.h:151
void SetMult(Double_t m)
Definition MethodSVM.h:113
MethodSVM(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
Definition MethodSVM.cxx:89
std::string fTune
Specify parameters to be tuned.
Definition MethodSVM.h:159
UInt_t fMaxIter
max number of iteration
Definition MethodSVM.h:135
std::vector< TMVA::SVKernelFunction::EKernelType > MakeKernelList(std::string multiKernels, TString kernel)
MakeKernelList Function providing string manipulation for product or sum of kernels functions to take...
std::string fGammaList
Definition MethodSVM.h:158
SVWorkingSet * fWgSet
svm working set
Definition MethodSVM.h:139
std::map< TString, Double_t > optimize()
Class that is the base-class for a vector of result.
Event class for Support Vector Machine.
Definition SVEvent.h:40
Kernel for Support Vector Machine.
Working class for Support Vector Machine.
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition Timer.cxx:145
void ReadTVectorDFromXML(void *node, const char *name, TVectorD *vec)
Definition Tools.cxx:1242
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:803
void WriteTVectorDToXML(void *node, const char *name, TVectorD *vec)
Definition Tools.cxx:1234
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1125
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347
void * AddChild(void *parent, const char *childname, const char *content=nullptr, bool isRootNode=false)
add child node
Definition Tools.cxx:1099
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1137
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kClassification
Definition Types.h:127
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
void Print(Option_t *option="") const override
Print TNamed name and title.
Definition TNamed.cxx:127
Basic string class.
Definition TString.h:138
create variable transformations
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Definition TMath.h:720
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Definition TMath.h:767