Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodSVM.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Marcin Wolter, Andrzej Zemla
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodSVM *
8 * *
9 * *
10 * Description: *
11 * Implementation *
12 * *
13 * Authors (alphabetical): *
14 * Marcin Wolter <Marcin.Wolter@cern.ch> - IFJ PAN, Krakow, Poland *
15 * Andrzej Zemla <azemla@cern.ch> - IFJ PAN, Krakow, Poland *
16 * (IFJ PAN: Henryk Niewodniczanski Inst. Nucl. Physics, Krakow, Poland) *
17 * *
18 * Introduction of regression by: *
19 * Krzysztof Danielowski <danielow@cern.ch> - IFJ PAN & AGH, Krakow, Poland *
20 * Kamil Kraszewski <kalq@cern.ch> - IFJ PAN & UJ, Krakow, Poland *
21 * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Krakow, Poland *
22 * *
23 * Introduction of kernel parameter optimisation *
24 * and additional kernel functions by: *
25 * Adrian Bevan <adrian.bevan@cern.ch> - Queen Mary *
26 * University of London, UK *
27 * Tom Stevenson <thomas.james.stevenson@cern.ch> - Queen Mary *
28 * University of London, UK *
29 * *
30 * Copyright (c) 2005: *
31 * CERN, Switzerland *
32 * MPI-K Heidelberg, Germany *
33 * PAN, Krakow, Poland *
34 * *
35 * Redistribution and use in source and binary forms, with or without *
36 * modification, are permitted according to the terms listed in LICENSE *
37 * (see tmva/doc/LICENSE) *
38 **********************************************************************************/
39
40/*! \class TMVA::MethodSVM
41\ingroup TMVA
42SMO Platt's SVM classifier with Keerthi & Shavade improvements
43*/
44
45#include "TMVA/MethodSVM.h"
46
47#include "TMVA/Tools.h"
48#include "TMVA/Timer.h"
49
50#include "TMVA/SVWorkingSet.h"
51
52#include "TMVA/SVEvent.h"
53
55
57#include "TMVA/Configurable.h"
58#include "TMVA/DataSet.h"
59#include "TMVA/DataSetInfo.h"
60#include "TMVA/Event.h"
61#include "TMVA/IMethod.h"
62#include "TMVA/MethodBase.h"
63#include "TMVA/MsgLogger.h"
64#include "TMVA/Types.h"
65#include "TMVA/Interval.h"
67#include "TMVA/Results.h"
69#include "TMVA/VariableInfo.h"
70
71#include "TFile.h"
72#include "TVectorD.h"
73#include "TMath.h"
74
75#include <iostream>
76#include <string>
77
78using std::vector;
79using std::string;
80using std::stringstream;
81
82//const Int_t basketsize__ = 1280000;
84
86
87////////////////////////////////////////////////////////////////////////////////
88/// standard constructor
89
90 TMVA::MethodSVM::MethodSVM( const TString& jobName, const TString& methodTitle, DataSetInfo& theData,
91 const TString& theOption )
92 : MethodBase( jobName, Types::kSVM, methodTitle, theData, theOption)
93 , fCost(0)
94 , fTolerance(0)
95 , fMaxIter(0)
96 , fNSubSets(0)
97 , fBparm(0)
98 , fGamma(0)
99 , fWgSet(0)
100 , fInputData(0)
101 , fSupportVectors(0)
102 , fSVKernelFunction(0)
103 , fMinVars(0)
104 , fMaxVars(0)
105 , fDoubleSigmaSquared(0)
106 , fOrder(0)
107 , fTheta(0)
108 , fKappa(0)
109 , fMult(0)
110 ,fNumVars(0)
111 , fGammas("")
112 , fGammaList("")
113 , fDataSize(0)
114 , fLoss(0)
115{
116 fVarNames.clear();
117 fNumVars = theData.GetVariableInfos().size();
118 for( int i=0; i<fNumVars; i++){
119 fVarNames.push_back(theData.GetVariableInfos().at(i).GetTitle());
120 }
121}
122
123////////////////////////////////////////////////////////////////////////////////
124/// constructor from weight file
125
126TMVA::MethodSVM::MethodSVM( DataSetInfo& theData, const TString& theWeightFile)
127 : MethodBase( Types::kSVM, theData, theWeightFile)
128 , fCost(0)
129 , fTolerance(0)
130 , fMaxIter(0)
131 , fNSubSets(0)
132 , fBparm(0)
133 , fGamma(0)
134 , fWgSet(0)
135 , fInputData(0)
136 , fSupportVectors(0)
137 , fSVKernelFunction(0)
138 , fMinVars(0)
139 , fMaxVars(0)
140 , fDoubleSigmaSquared(0)
141 , fOrder(0)
142 , fTheta(0)
143 , fKappa(0)
144 , fMult(0)
145 , fNumVars(0)
146 , fGammas("")
147 , fGammaList("")
148 , fDataSize(0)
149 , fLoss(0)
150{
151 fVarNames.clear();
152 fNumVars = theData.GetVariableInfos().size();
153 for( int i=0;i<fNumVars; i++){
154 fVarNames.push_back(theData.GetVariableInfos().at(i).GetTitle());
155 }
156}
157
158////////////////////////////////////////////////////////////////////////////////
159/// destructor
160
162{
163 fSupportVectors->clear();
164 for (UInt_t i=0; i<fInputData->size(); i++) {
165 delete fInputData->at(i);
166 }
167 if (fWgSet !=0) { delete fWgSet; fWgSet=0; }
168 if (fSVKernelFunction !=0 ) { delete fSVKernelFunction; fSVKernelFunction = 0; }
169}
170
171////////////////////////////////////////////////////////////////////////////////
172// reset the method, as if it had just been instantiated (forget all training etc.)
173
175{
176 // reset the method, as if it had just been instantiated (forget all training etc.)
177 fSupportVectors->clear();
178 for (UInt_t i=0; i<fInputData->size(); i++){
179 delete fInputData->at(i);
180 fInputData->at(i)=0;
181 }
182 fInputData->clear();
183 if (fWgSet !=0) { fWgSet=0; }
184 if (fSVKernelFunction !=0 ) { fSVKernelFunction = 0; }
185 if (Data()){
186 Data()->DeleteResults(GetMethodName(), Types::kTraining, GetAnalysisType());
187 }
188
189 Log() << kDEBUG << " successfully(?) reset the method " << Endl;
190}
191
192////////////////////////////////////////////////////////////////////////////////
193/// SVM can handle classification with 2 classes and regression with one regression-target
194
196{
197 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
198 if (type == Types::kRegression && numberTargets == 1) return kTRUE;
199 return kFALSE;
200}
201
202////////////////////////////////////////////////////////////////////////////////
203/// default initialisation
204
206{
207 // SVM always uses normalised input variables
208 SetNormalised( kTRUE );
209
210 // Helge: do not book a event vector of given size but rather fill the vector
211 // later with pus_back. Anyway, this is NOT what is time consuming in
212 // SVM and it allows to skip totally events with weights == 0 ;)
213 fInputData = new std::vector<TMVA::SVEvent*>(0);
214 fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
215}
216
217////////////////////////////////////////////////////////////////////////////////
218/// declare options available for this method
219
221{
222 DeclareOptionRef( fTheKernel = "RBF", "Kernel", "Pick which kernel ( RBF or MultiGauss )");
223 // for gaussian kernel parameter(s)
224 DeclareOptionRef( fGamma = 1., "Gamma", "RBF kernel parameter: Gamma (size of the Kernel)");
225 // for polynomial kernel parameter(s)
226 DeclareOptionRef( fOrder = 3, "Order", "Polynomial Kernel parameter: polynomial order");
227 DeclareOptionRef( fTheta = 1., "Theta", "Polynomial Kernel parameter: polynomial theta");
228 // for multi-gaussian kernel parameter(s)
229 DeclareOptionRef( fGammas = "", "GammaList", "MultiGauss parameters" );
230
231 // for range and step number for kernel parameter optimisation
232 DeclareOptionRef( fTune = "All", "Tune", "Tune Parameters");
233 // for list of kernels to be used with product or sum kernel
234 DeclareOptionRef( fMultiKernels = "None", "KernelList", "Sum or product of kernels");
235 DeclareOptionRef( fLoss = "hinge", "Loss", "Loss function");
236
237 DeclareOptionRef( fCost, "C", "Cost parameter" );
238 if (DoRegression()) {
239 fCost = 0.002;
240 }else{
241 fCost = 1.;
242 }
243 DeclareOptionRef( fTolerance = 0.01, "Tol", "Tolerance parameter" ); //should be fixed
244 DeclareOptionRef( fMaxIter = 1000, "MaxIter", "Maximum number of training loops" );
245
246}
247
248////////////////////////////////////////////////////////////////////////////////
249/// options that are used ONLY for the READER to ensure backward compatibility
250
252{
254 DeclareOptionRef( fNSubSets = 1, "NSubSets", "Number of training subsets" );
255 DeclareOptionRef( fTheKernel = "Gauss", "Kernel", "Uses kernel function");
256 // for gaussian kernel parameter(s)
257 DeclareOptionRef( fDoubleSigmaSquared = 2., "Sigma", "Kernel parameter: sigma");
258 // for polynomial kernel parameter(s)
259 DeclareOptionRef( fOrder = 3, "Order", "Polynomial Kernel parameter: polynomial order");
260 // for sigmoid kernel parameters
261 DeclareOptionRef( fTheta = 1., "Theta", "Sigmoid Kernel parameter: theta");
262 DeclareOptionRef( fKappa = 1., "Kappa", "Sigmoid Kernel parameter: kappa");
263}
264
265////////////////////////////////////////////////////////////////////////////////
266/// option post processing (if necessary)
267
269{
270 if (IgnoreEventsWithNegWeightsInTraining()) {
271 Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
272 << GetMethodTypeName()
273 << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
274 << Endl;
275 }
276}
277
278////////////////////////////////////////////////////////////////////////////////
279/// Train SVM
280
282{
283 fIPyMaxIter = fMaxIter;
284 Data()->SetCurrentType(Types::kTraining);
285
286 Log() << kDEBUG << "Create event vector"<< Endl;
287
288 fDataSize = Data()->GetNEvents();
289 Int_t nSignal = Data()->GetNEvtSigTrain();
290 Int_t nBackground = Data()->GetNEvtBkgdTrain();
291 Double_t CSig;
292 Double_t CBkg;
293
294 // Use number of signal and background from above to weight the cost parameter
295 // so that the training is not biased towards the larger dataset when the signal
296 // and background samples are significantly different sizes.
297 if(nSignal < nBackground){
298 CSig = fCost;
299 CBkg = CSig*((double)nSignal/nBackground);
300 }
301 else{
302 CBkg = fCost;
303 CSig = CBkg*((double)nSignal/nBackground);
304 }
305
306 // Loop over events and assign the correct cost parameter.
307 for (Int_t ievnt=0; ievnt<Data()->GetNEvents(); ievnt++){
308 if (GetEvent(ievnt)->GetWeight() != 0){
309 if(DataInfo().IsSignal(GetEvent(ievnt))){
310 fInputData->push_back(new SVEvent(GetEvent(ievnt), CSig, DataInfo().IsSignal\
311 (GetEvent(ievnt))));
312 }
313 else{
314 fInputData->push_back(new SVEvent(GetEvent(ievnt), CBkg, DataInfo().IsSignal\
315 (GetEvent(ievnt))));
316 }
317 }
318 }
319
320 // Set the correct kernel function.
321 // Here we only use valid Mercer kernels. In the literature some people have reported reasonable
322 // results using Sigmoid kernel function however that is not a valid Mercer kernel and is not used here.
323 if( fTheKernel == "RBF"){
324 fSVKernelFunction = new SVKernelFunction( SVKernelFunction::kRBF, fGamma);
325 }
326 else if( fTheKernel == "MultiGauss" ){
327 if(fGammas!=""){
328 SetMGamma(fGammas);
329 fGammaList=fGammas;
330 }
331 else{
332 if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
333 else{
334 for(Int_t ngammas=0; ngammas<fNumVars; ++ngammas){
335 fmGamma.push_back(1.0);
336 }
337 GetMGamma(fmGamma);
338 }
339 }
340 fSVKernelFunction = new SVKernelFunction(fmGamma);
341 }
342 else if( fTheKernel == "Polynomial" ){
343 fSVKernelFunction = new SVKernelFunction( SVKernelFunction::kPolynomial, fOrder,fTheta);
344 }
345 else if( fTheKernel == "Prod" ){
346 if(fGammas!=""){
347 SetMGamma(fGammas);
348 fGammaList=fGammas;
349 }
350 else{
351 if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
352 }
353 fSVKernelFunction = new SVKernelFunction( SVKernelFunction::kProd, MakeKernelList(fMultiKernels,fTheKernel), fmGamma, fGamma, fOrder, fTheta );
354 }
355 else if( fTheKernel == "Sum" ){
356 if(fGammas!=""){
357 SetMGamma(fGammas);
358 fGammaList=fGammas;
359 }
360 else{
361 if(fmGamma.size()!=0){ GetMGamma(fmGamma); } // Set fGammas if empty to write to XML file
362 }
363 fSVKernelFunction = new SVKernelFunction( SVKernelFunction::kSum, MakeKernelList(fMultiKernels,fTheKernel), fmGamma, fGamma, fOrder, fTheta );
364 }
365 else {
366 Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
367 exit(1);
368 }
369
370 Log()<< kINFO << "Building SVM Working Set...with "<<fInputData->size()<<" event instances"<< Endl;
371 Timer bldwstime( GetName());
372 fWgSet = new SVWorkingSet( fInputData, fSVKernelFunction,fTolerance, DoRegression() );
373 Log() << kINFO <<"Elapsed time for Working Set build: "<< bldwstime.GetElapsedTime()<<Endl;
374
375 // timing
376 Timer timer( GetName() );
377 Log() << kINFO << "Sorry, no computing time forecast available for SVM, please wait ..." << Endl;
378
379 if (fInteractive) fWgSet->SetIPythonInteractive(&fExitFromTraining, &fIPyCurrentIter);
380
381 fWgSet->Train(fMaxIter);
382
383 Log() << kINFO << "Elapsed time: " << timer.GetElapsedTime()
384 << " " << Endl;
385
386 fBparm = fWgSet->GetBpar();
387 fSupportVectors = fWgSet->GetSupportVectors();
388 delete fWgSet;
389 fWgSet=0;
390
391 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
392 ExitFromTraining();
393}
394
395////////////////////////////////////////////////////////////////////////////////
396/// write configuration to xml file
397
398void TMVA::MethodSVM::AddWeightsXMLTo( void* parent ) const
399{
400 void* wght = gTools().AddChild(parent, "Weights");
401 gTools().AddAttr(wght,"fBparm",fBparm);
402 gTools().AddAttr(wght,"fGamma",fGamma);
403 gTools().AddAttr(wght,"fGammaList",fGammaList);
404 gTools().AddAttr(wght,"fTheta",fTheta);
405 gTools().AddAttr(wght,"fOrder",fOrder);
406 gTools().AddAttr(wght,"NSupVec",fSupportVectors->size());
407
408 for (std::vector<TMVA::SVEvent*>::iterator veciter=fSupportVectors->begin();
409 veciter!=fSupportVectors->end() ; ++veciter ) {
410 TVectorD temp(GetNvar()+4);
411 temp[0] = (*veciter)->GetNs();
412 temp[1] = (*veciter)->GetTypeFlag();
413 temp[2] = (*veciter)->GetAlpha();
414 temp[3] = (*veciter)->GetAlpha_p();
415 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
416 temp[ivar+4] = (*(*veciter)->GetDataVector())[ivar];
417 gTools().WriteTVectorDToXML(wght,"SupportVector",&temp);
418 }
419 // write max/min data values
420 void* maxnode = gTools().AddChild(wght, "Maxima");
421 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
422 gTools().AddAttr(maxnode, "Var"+gTools().StringFromInt(ivar), GetXmax(ivar));
423 void* minnode = gTools().AddChild(wght, "Minima");
424 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
425 gTools().AddAttr(minnode, "Var"+gTools().StringFromInt(ivar), GetXmin(ivar));
426}
427
428////////////////////////////////////////////////////////////////////////////////
429
431{
432 gTools().ReadAttr( wghtnode, "fBparm",fBparm );
433 gTools().ReadAttr( wghtnode, "fGamma",fGamma);
434 gTools().ReadAttr( wghtnode, "fGammaList",fGammaList);
435 gTools().ReadAttr( wghtnode, "fOrder",fOrder);
436 gTools().ReadAttr( wghtnode, "fTheta",fTheta);
437 UInt_t fNsupv=0;
438 gTools().ReadAttr( wghtnode, "NSupVec",fNsupv );
439
440 Float_t alpha=0.;
441 Float_t alpha_p = 0.;
442
443 Int_t typeFlag=-1;
444 // UInt_t ns = 0;
445 std::vector<Float_t>* svector = new std::vector<Float_t>(GetNvar());
446
447 if (fMaxVars!=0) delete fMaxVars;
448 fMaxVars = new TVectorD( GetNvar() );
449 if (fMinVars!=0) delete fMinVars;
450 fMinVars = new TVectorD( GetNvar() );
451 if (fSupportVectors!=0) {
452 for (vector< SVEvent* >::iterator it = fSupportVectors->begin(); it!=fSupportVectors->end(); ++it)
453 delete *it;
454 delete fSupportVectors;
455 }
456 fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
457 void* supportvectornode = gTools().GetChild(wghtnode);
458 for (UInt_t ievt = 0; ievt < fNsupv; ievt++) {
459 TVectorD temp(GetNvar()+4);
460 gTools().ReadTVectorDFromXML(supportvectornode,"SupportVector",&temp);
461 // ns=(UInt_t)temp[0];
462 typeFlag=(int)temp[1];
463 alpha=temp[2];
464 alpha_p=temp[3];
465 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) (*svector)[ivar]=temp[ivar+4];
466
467 fSupportVectors->push_back(new SVEvent(svector,alpha,alpha_p,typeFlag));
468 supportvectornode = gTools().GetNextChild(supportvectornode);
469 }
470
471 void* maxminnode = supportvectornode;
472 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
473 gTools().ReadAttr( maxminnode,"Var"+gTools().StringFromInt(ivar),(*fMaxVars)[ivar]);
474 maxminnode = gTools().GetNextChild(maxminnode);
475 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++)
476 gTools().ReadAttr( maxminnode,"Var"+gTools().StringFromInt(ivar),(*fMinVars)[ivar]);
477 if (fSVKernelFunction!=0) delete fSVKernelFunction;
478 if( fTheKernel == "RBF" ){
479 fSVKernelFunction = new SVKernelFunction(SVKernelFunction::kRBF, fGamma);
480 }
481 else if( fTheKernel == "MultiGauss" ){
482 SetMGamma(fGammaList);
483 fSVKernelFunction = new SVKernelFunction(fmGamma);
484 }
485 else if( fTheKernel == "Polynomial" ){
486 fSVKernelFunction = new SVKernelFunction(SVKernelFunction::kPolynomial, fOrder, fTheta);
487 }
488 else if( fTheKernel == "Prod" ){
489 SetMGamma(fGammaList);
490 fSVKernelFunction = new SVKernelFunction(SVKernelFunction::kSum, MakeKernelList(fMultiKernels,fTheKernel), fmGamma, fGamma, fOrder, fTheta);
491 }
492 else if( fTheKernel == "Sum" ){
493 SetMGamma(fGammaList);
494 fSVKernelFunction = new SVKernelFunction(SVKernelFunction::kSum, MakeKernelList(fMultiKernels,fTheKernel), fmGamma, fGamma, fOrder, fTheta);
495 }
496 else {
497 Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
498 exit(1);
499 }
500 delete svector;
501}
502
503////////////////////////////////////////////////////////////////////////////////
504///TODO write IT
505/// write training sample (TTree) to file
506
508{
509}
510
511////////////////////////////////////////////////////////////////////////////////
512
514{
515 if (fSupportVectors !=0) { delete fSupportVectors; fSupportVectors = 0;}
516 fSupportVectors = new std::vector<TMVA::SVEvent*>(0);
517
518 // read configuration from input stream
519 istr >> fBparm;
520
521 UInt_t fNsupv;
522 // coverity[tainted_data_argument]
523 istr >> fNsupv;
524 fSupportVectors->reserve(fNsupv);
525
526 Float_t typeTalpha=0.;
527 Float_t alpha=0.;
528 Int_t typeFlag=-1;
529 UInt_t ns = 0;
530 std::vector<Float_t>* svector = new std::vector<Float_t>(GetNvar());
531
532 fMaxVars = new TVectorD( GetNvar() );
533 fMinVars = new TVectorD( GetNvar() );
534
535 for (UInt_t ievt = 0; ievt < fNsupv; ievt++) {
536 istr>>ns;
537 istr>>typeTalpha;
538 typeFlag = typeTalpha<0?-1:1;
539 alpha = typeTalpha<0?-typeTalpha:typeTalpha;
540 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> svector->at(ivar);
541
542 fSupportVectors->push_back(new SVEvent(svector,alpha,typeFlag,ns));
543 }
544
545 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMaxVars)[ivar];
546
547 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMinVars)[ivar];
548
549 delete fSVKernelFunction;
550 if (fTheKernel == "Gauss" ) {
551 fSVKernelFunction = new SVKernelFunction(1/fDoubleSigmaSquared);
552 }
553 else {
555 if(fTheKernel == "Linear") k = SVKernelFunction::kLinear;
556 else if (fTheKernel == "Polynomial") k = SVKernelFunction::kPolynomial;
557 else if (fTheKernel == "Sigmoid" ) k = SVKernelFunction::kSigmoidal;
558 else {
559 Log() << kFATAL <<"Unknown kernel function found in weight file!" << Endl;
560 }
561 fSVKernelFunction = new SVKernelFunction();
562 fSVKernelFunction->setCompatibilityParams(k, fOrder, fTheta, fKappa);
563 }
564 delete svector;
565}
566
567////////////////////////////////////////////////////////////////////////////////
568/// TODO write IT
569
571{
572}
573
574////////////////////////////////////////////////////////////////////////////////
575/// returns MVA value for given event
576
578{
579 Double_t myMVA = 0;
580
581 // TODO: avoid creation of a new SVEvent every time (Joerg)
582 SVEvent* ev = new SVEvent( GetEvent(), 0. ); // check for specificators
583
584 for (UInt_t ievt = 0; ievt < fSupportVectors->size() ; ievt++) {
585 myMVA += ( fSupportVectors->at(ievt)->GetAlpha()
586 * fSupportVectors->at(ievt)->GetTypeFlag()
587 * fSVKernelFunction->Evaluate( fSupportVectors->at(ievt), ev ) );
588 }
589
590 delete ev;
591
592 myMVA -= fBparm;
593
594 // cannot determine error
595 NoErrorCalc(err, errUpper);
596
597 // 08/12/09: changed sign here to make results agree with convention signal=1
598 return 1.0/(1.0 + TMath::Exp(myMVA));
599}
600////////////////////////////////////////////////////////////////////////////////
601
602const std::vector<Float_t>& TMVA::MethodSVM::GetRegressionValues()
603{
604 if( fRegressionReturnVal == NULL )
605 fRegressionReturnVal = new std::vector<Float_t>();
606 fRegressionReturnVal->clear();
607
608 Double_t myMVA = 0;
609
610 const Event *baseev = GetEvent();
611 SVEvent* ev = new SVEvent( baseev,0. ); //check for specificators
612
613 for (UInt_t ievt = 0; ievt < fSupportVectors->size() ; ievt++) {
614 myMVA += ( fSupportVectors->at(ievt)->GetDeltaAlpha()
615 *fSVKernelFunction->Evaluate( fSupportVectors->at(ievt), ev ) );
616 }
617 myMVA += fBparm;
618 Event * evT = new Event(*baseev);
619 evT->SetTarget(0,myMVA);
620
621 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
622
623 fRegressionReturnVal->push_back(evT2->GetTarget(0));
624
625 delete evT;
626
627 delete ev;
628
629 return *fRegressionReturnVal;
630}
631
632////////////////////////////////////////////////////////////////////////////////
633/// write specific classifier response
634
635void TMVA::MethodSVM::MakeClassSpecific( std::ostream& fout, const TString& className ) const
636{
637 const int fNsupv = fSupportVectors->size();
638 fout << " // not implemented for class: \"" << className << "\"" << std::endl;
639 fout << " float fBparameter;" << std::endl;
640 fout << " int fNOfSuppVec;" << std::endl;
641 fout << " static float fAllSuppVectors[][" << fNsupv << "];" << std::endl;
642 fout << " static float fAlphaTypeCoef[" << fNsupv << "];" << std::endl;
643 fout << std::endl;
644 fout << " // Kernel parameter(s) " << std::endl;
645 fout << " float fGamma;" << std::endl;
646 fout << "};" << std::endl;
647 fout << "" << std::endl;
648
649 //Initialize function definition
650 fout << "inline void " << className << "::Initialize() " << std::endl;
651 fout << "{" << std::endl;
652 fout << " fBparameter = " << fBparm << ";" << std::endl;
653 fout << " fNOfSuppVec = " << fNsupv << ";" << std::endl;
654 fout << " fGamma = " << fGamma << ";" <<std::endl;
655 fout << "}" << std::endl;
656 fout << std::endl;
657
658 // GetMvaValue__ function definition
659 fout << "inline double " << className << "::GetMvaValue__(const std::vector<double>& inputValues ) const" << std::endl;
660 fout << "{" << std::endl;
661 fout << " double mvaval = 0; " << std::endl;
662 fout << " double temp = 0; " << std::endl;
663 fout << std::endl;
664 fout << " for (int ievt = 0; ievt < fNOfSuppVec; ievt++ ){" << std::endl;
665 fout << " temp = 0;" << std::endl;
666 fout << " for ( unsigned int ivar = 0; ivar < GetNvar(); ivar++ ) {" << std::endl;
667
668 fout << " temp += (fAllSuppVectors[ivar][ievt] - inputValues[ivar]) " << std::endl;
669 fout << " * (fAllSuppVectors[ivar][ievt] - inputValues[ivar]); " << std::endl;
670 fout << " }" << std::endl;
671 fout << " mvaval += fAlphaTypeCoef[ievt] * exp( -fGamma * temp ); " << std::endl;
672
673 fout << " }" << std::endl;
674 fout << " mvaval -= fBparameter;" << std::endl;
675 fout << " return 1./(1. + exp(mvaval));" << std::endl;
676 fout << "}" << std::endl;
677 fout << "// Clean up" << std::endl;
678 fout << "inline void " << className << "::Clear() " << std::endl;
679 fout << "{" << std::endl;
680 fout << " // nothing to clear " << std::endl;
681 fout << "}" << std::endl;
682 fout << "" << std::endl;
683
684 // define support vectors
685 fout << "float " << className << "::fAlphaTypeCoef[] =" << std::endl;
686 fout << "{ ";
687 for (Int_t isv = 0; isv < fNsupv; isv++) {
688 fout << fSupportVectors->at(isv)->GetDeltaAlpha() * fSupportVectors->at(isv)->GetTypeFlag();
689 if (isv < fNsupv-1) fout << ", ";
690 }
691 fout << " };" << std::endl << std::endl;
692
693 fout << "float " << className << "::fAllSuppVectors[][" << fNsupv << "] =" << std::endl;
694 fout << "{";
695 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
696 fout << std::endl;
697 fout << " { ";
698 for (Int_t isv = 0; isv < fNsupv; isv++){
699 fout << fSupportVectors->at(isv)->GetDataVector()->at(ivar);
700 if (isv < fNsupv-1) fout << ", ";
701 }
702 fout << " }";
703 if (ivar < GetNvar()-1) fout << ", " << std::endl;
704 else fout << std::endl;
705 }
706 fout << "};" << std::endl<< std::endl;
707}
708
709////////////////////////////////////////////////////////////////////////////////
710/// get help message text
711///
712/// typical length of text line:
713/// "|--------------------------------------------------------------|"
714
716{
717 Log() << Endl;
718 Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
719 Log() << Endl;
720 Log() << "The Support Vector Machine (SVM) builds a hyperplane separating" << Endl;
721 Log() << "signal and background events (vectors) using the minimal subset of " << Endl;
722 Log() << "all vectors used for training (support vectors). The extension to" << Endl;
723 Log() << "the non-linear case is performed by mapping input vectors into a " << Endl;
724 Log() << "higher-dimensional feature space in which linear separation is " << Endl;
725 Log() << "possible. The use of the kernel functions thereby eliminates the " << Endl;
726 Log() << "explicit transformation to the feature space. The implemented SVM " << Endl;
727 Log() << "algorithm performs the classification tasks using linear, polynomial, " << Endl;
728 Log() << "Gaussian and sigmoidal kernel functions. The Gaussian kernel allows " << Endl;
729 Log() << "to apply any discriminant shape in the input space." << Endl;
730 Log() << Endl;
731 Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
732 Log() << Endl;
733 Log() << "SVM is a general purpose non-linear classification method, which " << Endl;
734 Log() << "does not require data preprocessing like decorrelation or Principal " << Endl;
735 Log() << "Component Analysis. It generalises quite well and can handle analyses " << Endl;
736 Log() << "with large numbers of input variables." << Endl;
737 Log() << Endl;
738 Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
739 Log() << Endl;
740 Log() << "Optimal performance requires primarily a proper choice of the kernel " << Endl;
741 Log() << "parameters (the width \"Sigma\" in case of Gaussian kernel) and the" << Endl;
742 Log() << "cost parameter \"C\". The user must optimise them empirically by running" << Endl;
743 Log() << "SVM several times with different parameter sets. The time needed for " << Endl;
744 Log() << "each evaluation scales like the square of the number of training " << Endl;
745 Log() << "events so that a coarse preliminary tuning should be performed on " << Endl;
746 Log() << "reduced data sets." << Endl;
747}
748
749////////////////////////////////////////////////////////////////////////////////
750/// Optimize Tuning Parameters
751/// This is used to optimise the kernel function parameters and cost. All kernel parameters
752/// are optimised by default with default ranges, however the parameters to be optimised can
753/// be set when booking the method with the option Tune.
754///
755/// Example:
756///
757/// "Tune=Gamma[0.01;1.0;100]" would only tune the RBF Gamma between 0.01 and 1.0
758/// with 100 steps.
759
760std::map<TString,Double_t> TMVA::MethodSVM::OptimizeTuningParameters(TString fomType, TString fitType)
761{
762 // Call the Optimizer with the set of kernel parameters and ranges that are meant to be tuned.
763 std::map< TString,std::vector<Double_t> > optVars;
764 // Get parameters and options specified in booking of method.
765 if(fTune != "All"){
766 optVars= GetTuningOptions();
767 }
768 std::map< TString,std::vector<Double_t> >::iterator iter;
769 // Fill all the tuning parameters that should be optimized into a map
770 std::map<TString,TMVA::Interval*> tuneParameters;
771 std::map<TString,Double_t> tunedParameters;
772 // Note: the 3rd parameter in the interval is the "number of bins", NOT the stepsize!!
773 // The actual values are always read from the middle of the bins.
774 Log() << kINFO << "Using the " << fTheKernel << " kernel." << Endl;
775 // Setup map of parameters based on the specified options or defaults.
776 if( fTheKernel == "RBF" ){
777 if(fTune == "All"){
778 tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
779 tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
780 }
781 else{
782 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
783 if( iter->first == "Gamma" || iter->first == "C"){
784 tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
785 }
786 else{
787 Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
788 exit(1);
789 }
790 }
791 }
792 }
793 else if( fTheKernel == "Polynomial" ){
794 if (fTune == "All"){
795 tuneParameters.insert(std::pair<TString,Interval*>("Order", new Interval(1,10,10)));
796 tuneParameters.insert(std::pair<TString,Interval*>("Theta", new Interval(0.01,1.,100)));
797 tuneParameters.insert(std::pair<TString,Interval*>("C", new Interval(0.01,1.,100)));
798 }
799 else{
800 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
801 if( iter->first == "Theta" || iter->first == "C"){
802 tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
803 }
804 else if( iter->first == "Order"){
805 tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
806 }
807 else{
808 Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
809 exit(1);
810 }
811 }
812 }
813 }
814 else if( fTheKernel == "MultiGauss" ){
815 if (fTune == "All"){
816 for(int i=0; i<fNumVars; i++){
817 stringstream s;
818 s << fVarNames.at(i);
819 string str = "Gamma_" + s.str();
820 tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
821 }
822 tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
823 } else {
824 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
825 if( iter->first == "GammaList"){
826 for(int j=0; j<fNumVars; j++){
827 stringstream s;
828 s << fVarNames.at(j);
829 string str = "Gamma_" + s.str();
830 tuneParameters.insert(std::pair<TString,Interval*>(str, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
831 }
832 }
833 else if( iter->first == "C"){
834 tuneParameters.insert(std::pair<TString,Interval*>(iter->first, new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
835 }
836 else{
837 Log() << kWARNING << iter->first << " is not a recognised tuneable parameter." << Endl;
838 exit(1);
839 }
840 }
841 }
842 }
843 else if( fTheKernel == "Prod" ){
844 std::stringstream tempstring(fMultiKernels);
845 std::string value;
846 while (std::getline(tempstring,value,'*')){
847 if(value == "RBF"){
848 tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
849 }
850 else if(value == "MultiGauss"){
851 for(int i=0; i<fNumVars; i++){
852 stringstream s;
853 s << fVarNames.at(i);
854 string str = "Gamma_" + s.str();
855 tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
856 }
857 }
858 else if(value == "Polynomial"){
859 tuneParameters.insert(std::pair<TString,Interval*>("Order",new Interval(1,10,10)));
860 tuneParameters.insert(std::pair<TString,Interval*>("Theta",new Interval(0.0,1.0,101)));
861 }
862 else {
863 Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
864 exit(1);
865 }
866 }
867 tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
868 }
869 else if( fTheKernel == "Sum" ){
870 std::stringstream tempstring(fMultiKernels);
871 std::string value;
872 while (std::getline(tempstring,value,'+')){
873 if(value == "RBF"){
874 tuneParameters.insert(std::pair<TString,Interval*>("Gamma",new Interval(0.01,1.,100)));
875 }
876 else if(value == "MultiGauss"){
877 for(int i=0; i<fNumVars; i++){
878 stringstream s;
879 s << fVarNames.at(i);
880 string str = "Gamma_" + s.str();
881 tuneParameters.insert(std::pair<TString,Interval*>(str,new Interval(0.01,1.,100)));
882 }
883 }
884 else if(value == "Polynomial"){
885 tuneParameters.insert(std::pair<TString,Interval*>("Order",new Interval(1,10,10)));
886 tuneParameters.insert(std::pair<TString,Interval*>("Theta",new Interval(0.0,1.0,101)));
887 }
888 else {
889 Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
890 exit(1);
891 }
892 }
893 tuneParameters.insert(std::pair<TString,Interval*>("C",new Interval(0.01,1.,100)));
894 }
895 else {
896 Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
897 exit(1);
898 }
899 Log() << kINFO << " the following SVM parameters will be tuned on the respective *grid*\n" << Endl;
900 std::map<TString,TMVA::Interval*>::iterator it;
901 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
902 Log() << kWARNING << it->first <<Endl;
903 std::ostringstream oss;
904 (it->second)->Print(oss);
905 Log()<<oss.str();
906 Log()<<Endl;
907 }
908 OptimizeConfigParameters optimize(this, tuneParameters, fomType, fitType);
909 tunedParameters=optimize.optimize();
910
911 return tunedParameters;
912
913}
914
915////////////////////////////////////////////////////////////////////////////////
916/// Set the tuning parameters according to the argument
917void TMVA::MethodSVM::SetTuneParameters(std::map<TString,Double_t> tuneParameters)
918{
919 std::map<TString,Double_t>::iterator it;
920 if( fTheKernel == "RBF" ){
921 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
922 Log() << kWARNING << it->first << " = " << it->second << Endl;
923 if (it->first == "Gamma"){
924 SetGamma (it->second);
925 }
926 else if(it->first == "C"){
927 SetCost (it->second);
928 }
929 else {
930 Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
931 }
932 }
933 }
934 else if( fTheKernel == "MultiGauss" ){
935 fmGamma.clear();
936 for(int i=0; i<fNumVars; i++){
937 stringstream s;
938 s << fVarNames.at(i);
939 string str = "Gamma_" + s.str();
940 Log() << kWARNING << tuneParameters.find(str)->first << " = " << tuneParameters.find(str)->second << Endl;
941 fmGamma.push_back(tuneParameters.find(str)->second);
942 }
943 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
944 if (it->first == "C"){
945 Log() << kWARNING << it->first << " = " << it->second << Endl;
946 SetCost(it->second);
947 break;
948 }
949 }
950 }
951 else if( fTheKernel == "Polynomial" ){
952 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
953 Log() << kWARNING << it->first << " = " << it->second << Endl;
954 if (it->first == "Order"){
955 SetOrder(it->second);
956 }
957 else if (it->first == "Theta"){
958 SetTheta(it->second);
959 }
960 else if(it->first == "C"){ SetCost (it->second);
961 }
962 else if(it->first == "Mult"){
963 SetMult(it->second);
964 }
965 else{
966 Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
967 }
968 }
969 }
970 else if( fTheKernel == "Prod" || fTheKernel == "Sum"){
971 fmGamma.clear();
972 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
973 bool foundParam = false;
974 Log() << kWARNING << it->first << " = " << it->second << Endl;
975 for(int i=0; i<fNumVars; i++){
976 stringstream s;
977 s << fVarNames.at(i);
978 string str = "Gamma_" + s.str();
979 if(it->first == str){
980 fmGamma.push_back(it->second);
981 foundParam = true;
982 }
983 }
984 if (it->first == "Gamma"){
985 SetGamma (it->second);
986 foundParam = true;
987 }
988 else if (it->first == "Order"){
989 SetOrder (it->second);
990 foundParam = true;
991 }
992 else if (it->first == "Theta"){
993 SetTheta (it->second);
994 foundParam = true;
995 }
996 else if (it->first == "C"){ SetCost (it->second);
997 SetCost (it->second);
998 foundParam = true;
999 }
1000 else{
1001 if(!foundParam){
1002 Log() << kFATAL << " SetParameter for " << it->first << " not implemented " << Endl;
1003 }
1004 }
1005 }
1006 }
1007 else {
1008 Log() << kWARNING << fTheKernel << " is not a recognised kernel function." << Endl;
1009 exit(1);
1010 }
1011}
1012
1013////////////////////////////////////////////////////////////////////////////////
1014/// Takes as input a string of values for multigaussian gammas and splits it, filling the
1015/// gamma vector required by the SVKernelFunction. Example: "GammaList=0.1,0.2,0.3" would
1016/// make a vector with Gammas of 0.1,0.2 & 0.3 corresponding to input variables 1,2 & 3
1017/// respectively.
1018void TMVA::MethodSVM::SetMGamma(std::string & mg){
1019 std::stringstream tempstring(mg);
1020 Float_t value;
1021 while (tempstring >> value){
1022 fmGamma.push_back(value);
1023
1024 if (tempstring.peek() == ','){
1025 tempstring.ignore();
1026 }
1027 }
1028}
1029
1030////////////////////////////////////////////////////////////////////////////////
1031/// Produces GammaList string for multigaussian kernel to be written to xml file
1032void TMVA::MethodSVM::GetMGamma(const std::vector<float> & gammas){
1033 std::ostringstream tempstring;
1034 for(UInt_t i = 0; i<gammas.size(); ++i){
1035 tempstring << gammas.at(i);
1036 if(i!=(gammas.size()-1)){
1037 tempstring << ",";
1038 }
1039 }
1040 fGammaList= tempstring.str();
1041}
1042
1043////////////////////////////////////////////////////////////////////////////////
1044/// MakeKernelList
1045/// Function providing string manipulation for product or sum of kernels functions
1046/// to take list of kernels specified in the booking of the method and provide a vector
1047/// of SV kernels to iterate over in SVKernelFunction.
1048///
1049/// Example:
1050///
1051/// "KernelList=RBF*Polynomial" would use a product of the RBF and Polynomial
1052/// kernels.
1053
1054std::vector<TMVA::SVKernelFunction::EKernelType> TMVA::MethodSVM::MakeKernelList(std::string multiKernels, TString kernel)
1055{
1056 std::vector<TMVA::SVKernelFunction::EKernelType> kernelsList;
1057 std::stringstream tempstring(multiKernels);
1058 std::string value;
1059 if(kernel=="Prod"){
1060 while (std::getline(tempstring,value,'*')){
1061 if(value == "RBF"){ kernelsList.push_back(SVKernelFunction::kRBF);}
1062 else if(value == "MultiGauss"){
1063 kernelsList.push_back(SVKernelFunction::kMultiGauss);
1064 if(fGammas!=""){
1065 SetMGamma(fGammas);
1066 }
1067 }
1068 else if(value == "Polynomial"){ kernelsList.push_back(SVKernelFunction::kPolynomial);}
1069 else {
1070 Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
1071 exit(1);
1072 }
1073 }
1074 }
1075 else if(kernel=="Sum"){
1076 while (std::getline(tempstring,value,'+')){
1077 if(value == "RBF"){ kernelsList.push_back(SVKernelFunction::kRBF);}
1078 else if(value == "MultiGauss"){
1079 kernelsList.push_back(SVKernelFunction::kMultiGauss);
1080 if(fGammas!=""){
1081 SetMGamma(fGammas);
1082 }
1083 }
1084 else if(value == "Polynomial"){ kernelsList.push_back(SVKernelFunction::kPolynomial);}
1085 else {
1086 Log() << kWARNING << value << " is not a recognised kernel function." << Endl;
1087 exit(1);
1088 }
1089 }
1090 }
1091 else {
1092 Log() << kWARNING << "Unable to split MultiKernels. Delimiters */+ required." << Endl;
1093 exit(1);
1094 }
1095 return kernelsList;
1096}
1097
1098////////////////////////////////////////////////////////////////////////////////
1099/// GetTuningOptions
1100/// Function to allow for ranges and number of steps (for scan) when optimising kernel
1101/// function parameters. Specified when booking the method after the parameter to be
1102/// optimised between square brackets with each value separated by ;, the first value
1103/// is the lower limit, the second the upper limit and the third is the number of steps.
1104/// Example: "Tune=Gamma[0.01;1.0;100]" would only tune the RBF Gamma between 0.01 and
1105/// 100 steps.
1106std::map< TString,std::vector<Double_t> > TMVA::MethodSVM::GetTuningOptions()
1107{
1108 std::map< TString,std::vector<Double_t> > optVars;
1109 std::stringstream tempstring(fTune);
1110 std::string value;
1111 while (std::getline(tempstring,value,',')){
1112 unsigned first = value.find('[')+1;
1113 unsigned last = value.find_last_of(']');
1114 std::string optParam = value.substr(0,first-1);
1115 std::stringstream strNew (value.substr(first,last-first));
1116 Double_t optInterval;
1117 std::vector<Double_t> tempVec;
1118 UInt_t i = 0;
1119 while (strNew >> optInterval){
1120 tempVec.push_back(optInterval);
1121 if (strNew.peek() == ';'){
1122 strNew.ignore();
1123 }
1124 ++i;
1125 }
1126 if(i != 3 && i == tempVec.size()){
1127 if(optParam == "C" || optParam == "Gamma" || optParam == "GammaList" || optParam == "Theta"){
1128 switch(i){
1129 case 0:
1130 tempVec.push_back(0.01);
1131 case 1:
1132 tempVec.push_back(1.);
1133 case 2:
1134 tempVec.push_back(100);
1135 }
1136 }
1137 else if(optParam == "Order"){
1138 switch(i){
1139 case 0:
1140 tempVec.push_back(1);
1141 case 1:
1142 tempVec.push_back(10);
1143 case 2:
1144 tempVec.push_back(10);
1145 }
1146 }
1147 else{
1148 Log() << kWARNING << optParam << " is not a recognised tuneable parameter." << Endl;
1149 exit(1);
1150 }
1151 }
1152 optVars.insert(std::pair<TString,std::vector<Double_t> >(optParam,tempVec));
1153 }
1154 return optVars;
1155}
1156
1157////////////////////////////////////////////////////////////////////////////////
1158/// getLoss
1159/// Calculates loss for testing dataset. The loss function can be specified when
1160/// booking the method, otherwise defaults to hinge loss. Currently not used however
1161/// is accesible if required.
1162
1164 Double_t loss = 0.0;
1165 Double_t sumW = 0.0;
1166 Double_t temp = 0.0;
1167 Data()->SetCurrentType(Types::kTesting);
1168 ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*> ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
1169 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1170 const Event* ev = GetEvent(ievt);
1171 Float_t v = (*mvaRes)[ievt][0];
1172 Float_t w = ev->GetWeight();
1173 if(DataInfo().IsSignal(ev)){
1174 if(lossFunction == "hinge"){
1175 temp += w*(1-v);
1176 }
1177 else if(lossFunction == "exp"){
1178 temp += w*TMath::Exp(-v);
1179 }
1180 else if(lossFunction == "binomial"){
1181 temp += w*TMath::Log(1+TMath::Exp(-2*v));
1182 }
1183 else{
1184 Log() << kWARNING << lossFunction << " is not a recognised loss function." << Endl;
1185 exit(1);
1186 }
1187 }
1188 else{
1189 if(lossFunction == "hinge"){
1190 temp += w*v;
1191 }
1192 else if(lossFunction == "exp"){
1193 temp += w*TMath::Exp(-(1-v));
1194 }
1195 else if(lossFunction == "binomial"){
1196 temp += w*TMath::Log(1+TMath::Exp(-2*(1-v)));
1197 }
1198 else{
1199 Log() << kWARNING << lossFunction << " is not a recognised loss function." << Endl;
1200 exit(1);
1201 }
1202 }
1203 sumW += w;
1204 }
1205 loss = temp/sumW;
1206
1207 return loss;
1208}
#define REGISTER_METHOD(CLASS)
for example
float Float_t
Definition RtypesCore.h:57
constexpr Bool_t kFALSE
Definition RtypesCore.h:94
long long Long64_t
Definition RtypesCore.h:69
constexpr Bool_t kTRUE
Definition RtypesCore.h:93
#define ClassImp(name)
Definition Rtypes.h:382
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
void Print(GNN_Data &d, std::string txt="")
TVectorT< Double_t > TVectorD
Definition TVectorDfwd.h:23
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:53
Class that contains all the data information.
Definition DataSetInfo.h:62
std::vector< VariableInfo > & GetVariableInfos()
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition Event.cxx:367
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition Event.cxx:389
Float_t GetTarget(UInt_t itgt) const
Definition Event.h:102
The TMVA::Interval Class.
Definition Interval.h:61
Virtual base Class for all MVA method.
Definition MethodBase.h:111
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
SMO Platt's SVM classifier with Keerthi & Shavade improvements.
Definition MethodSVM.h:61
Double_t getLoss(TString lossFunction)
getLoss Calculates loss for testing dataset.
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
Set the tuning parameters according to the argument.
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
returns MVA value for given event
void DeclareOptions()
declare options available for this method
std::vector< TString > fVarNames
Definition MethodSVM.h:156
void WriteWeightsToStream(TFile &fout) const
TODO write IT write training sample (TTree) to file.
void SetMGamma(std::string &mg)
Takes as input a string of values for multigaussian gammas and splits it, filling the gamma vector re...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
SVM can handle classification with 2 classes and regression with one regression-target.
void ReadWeightsFromStream(std::istream &istr)
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="Minuit")
Optimize Tuning Parameters This is used to optimise the kernel function parameters and cost.
void GetMGamma(const std::vector< float > &gammas)
Produces GammaList string for multigaussian kernel to be written to xml file.
void AddWeightsXMLTo(void *parent) const
write configuration to xml file
Float_t fNumVars
number of input variables for multi-gaussian
Definition MethodSVM.h:155
void Reset(void)
std::map< TString, std::vector< Double_t > > GetTuningOptions()
GetTuningOptions Function to allow for ranges and number of steps (for scan) when optimising kernel f...
void ReadWeightsFromXML(void *wghtnode)
void ProcessOptions()
option post processing (if necessary)
void Train(void)
Train SVM.
void Init(void)
default initialisation
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
virtual ~MethodSVM(void)
destructor
const std::vector< Float_t > & GetRegressionValues()
MethodSVM(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
Definition MethodSVM.cxx:90
void GetHelpMessage() const
get help message text
std::vector< TMVA::SVKernelFunction::EKernelType > MakeKernelList(std::string multiKernels, TString kernel)
MakeKernelList Function providing string manipulation for product or sum of kernels functions to take...
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
std::map< TString, Double_t > optimize()
Class that is the base-class for a vector of result.
Event class for Support Vector Machine.
Definition SVEvent.h:40
Kernel for Support Vector Machine.
Working class for Support Vector Machine.
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition Timer.cxx:146
void ReadTVectorDFromXML(void *node, const char *name, TVectorD *vec)
Definition Tools.cxx:1267
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:828
void WriteTVectorDToXML(void *node, const char *name, TVectorD *vec)
Definition Tools.cxx:1259
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1150
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347
TString StringFromInt(Long_t i)
string tools
Definition Tools.cxx:1223
void * AddChild(void *parent, const char *childname, const char *content=nullptr, bool isRootNode=false)
add child node
Definition Tools.cxx:1124
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1162
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kClassification
Definition Types.h:127
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
Basic string class.
Definition TString.h:139
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Definition TMath.h:709
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Definition TMath.h:756