80using std::stringstream;
118 fVarNames.push_back(theData.GetVariableInfos().at(i).GetTitle());
188 Log() << kDEBUG <<
" successfully(?) reset the method " <<
Endl;
212 fInputData =
new std::vector<TMVA::SVEvent*>(0);
270 Log() << kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: "
272 <<
" --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
285 Log() << kDEBUG <<
"Create event vector"<<
Endl;
288 Int_t nSignal =
Data()->GetNEvtSigTrain();
289 Int_t nBackground =
Data()->GetNEvtBkgdTrain();
296 if(nSignal < nBackground){
298 CBkg = CSig*((
double)nSignal/nBackground);
302 CSig = CBkg*((
double)nSignal/nBackground);
306 for (
Int_t ievnt=0; ievnt<
Data()->GetNEvents(); ievnt++){
307 if (
GetEvent(ievnt)->GetWeight() != 0){
365 Log() << kWARNING <<
fTheKernel <<
" is not a recognised kernel function." <<
Endl;
369 Log()<< kINFO <<
"Building SVM Working Set...with "<<
fInputData->size()<<
" event instances"<<
Endl;
376 Log() << kINFO <<
"Sorry, no computing time forecast available for SVM, please wait ..." <<
Endl;
407 for (std::vector<TMVA::SVEvent*>::iterator veciter=
fSupportVectors->begin();
410 temp[0] = (*veciter)->GetNs();
411 temp[1] = (*veciter)->GetTypeFlag();
412 temp[2] = (*veciter)->GetAlpha();
413 temp[3] = (*veciter)->GetAlpha_p();
415 temp[ivar+4] = (*(*veciter)->GetDataVector())[ivar];
444 std::vector<Float_t>* svector =
new std::vector<Float_t>(
GetNvar());
457 for (
UInt_t ievt = 0; ievt < fNsupv; ievt++) {
461 typeFlag=(
int)temp[1];
464 for (
UInt_t ivar = 0; ivar <
GetNvar(); ivar++) (*svector)[ivar]=temp[ivar+4];
470 void* maxminnode = supportvectornode;
496 Log() << kWARNING <<
fTheKernel <<
" is not a recognised kernel function." <<
Endl;
529 std::vector<Float_t>* svector =
new std::vector<Float_t>(
GetNvar());
534 for (
UInt_t ievt = 0; ievt < fNsupv; ievt++) {
537 typeFlag = typeTalpha<0?-1:1;
538 alpha = typeTalpha<0?-typeTalpha:typeTalpha;
539 for (
UInt_t ivar = 0; ivar <
GetNvar(); ivar++) istr >> svector->at(ivar);
558 Log() << kFATAL <<
"Unknown kernel function found in weight file!" <<
Endl;
637 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
638 fout <<
" float fBparameter;" << std::endl;
639 fout <<
" int fNOfSuppVec;" << std::endl;
640 fout <<
" static float fAllSuppVectors[][" << fNsupv <<
"];" << std::endl;
641 fout <<
" static float fAlphaTypeCoef[" << fNsupv <<
"];" << std::endl;
643 fout <<
" // Kernel parameter(s) " << std::endl;
644 fout <<
" float fGamma;" << std::endl;
645 fout <<
"};" << std::endl;
646 fout <<
"" << std::endl;
649 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
650 fout <<
"{" << std::endl;
651 fout <<
" fBparameter = " <<
fBparm <<
";" << std::endl;
652 fout <<
" fNOfSuppVec = " << fNsupv <<
";" << std::endl;
653 fout <<
" fGamma = " <<
fGamma <<
";" <<std::endl;
654 fout <<
"}" << std::endl;
658 fout <<
"inline double " << className <<
"::GetMvaValue__(const std::vector<double>& inputValues ) const" << std::endl;
659 fout <<
"{" << std::endl;
660 fout <<
" double mvaval = 0; " << std::endl;
661 fout <<
" double temp = 0; " << std::endl;
663 fout <<
" for (int ievt = 0; ievt < fNOfSuppVec; ievt++ ){" << std::endl;
664 fout <<
" temp = 0;" << std::endl;
665 fout <<
" for ( unsigned int ivar = 0; ivar < GetNvar(); ivar++ ) {" << std::endl;
667 fout <<
" temp += (fAllSuppVectors[ivar][ievt] - inputValues[ivar]) " << std::endl;
668 fout <<
" * (fAllSuppVectors[ivar][ievt] - inputValues[ivar]); " << std::endl;
669 fout <<
" }" << std::endl;
670 fout <<
" mvaval += fAlphaTypeCoef[ievt] * exp( -fGamma * temp ); " << std::endl;
672 fout <<
" }" << std::endl;
673 fout <<
" mvaval -= fBparameter;" << std::endl;
674 fout <<
" return 1./(1. + exp(mvaval));" << std::endl;
675 fout <<
"}" << std::endl;
676 fout <<
"// Clean up" << std::endl;
677 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
678 fout <<
"{" << std::endl;
679 fout <<
" // nothing to clear " << std::endl;
680 fout <<
"}" << std::endl;
681 fout <<
"" << std::endl;
684 fout <<
"float " << className <<
"::fAlphaTypeCoef[] =" << std::endl;
686 for (
Int_t isv = 0; isv < fNsupv; isv++) {
688 if (isv < fNsupv-1) fout <<
", ";
690 fout <<
" };" << std::endl << std::endl;
692 fout <<
"float " << className <<
"::fAllSuppVectors[][" << fNsupv <<
"] =" << std::endl;
697 for (
Int_t isv = 0; isv < fNsupv; isv++){
699 if (isv < fNsupv-1) fout <<
", ";
702 if (ivar <
GetNvar()-1) fout <<
", " << std::endl;
703 else fout << std::endl;
705 fout <<
"};" << std::endl<< std::endl;
719 Log() <<
"The Support Vector Machine (SVM) builds a hyperplane separating" <<
Endl;
720 Log() <<
"signal and background events (vectors) using the minimal subset of " <<
Endl;
721 Log() <<
"all vectors used for training (support vectors). The extension to" <<
Endl;
722 Log() <<
"the non-linear case is performed by mapping input vectors into a " <<
Endl;
723 Log() <<
"higher-dimensional feature space in which linear separation is " <<
Endl;
724 Log() <<
"possible. The use of the kernel functions thereby eliminates the " <<
Endl;
725 Log() <<
"explicit transformation to the feature space. The implemented SVM " <<
Endl;
726 Log() <<
"algorithm performs the classification tasks using linear, polynomial, " <<
Endl;
727 Log() <<
"Gaussian and sigmoidal kernel functions. The Gaussian kernel allows " <<
Endl;
728 Log() <<
"to apply any discriminant shape in the input space." <<
Endl;
732 Log() <<
"SVM is a general purpose non-linear classification method, which " <<
Endl;
733 Log() <<
"does not require data preprocessing like decorrelation or Principal " <<
Endl;
734 Log() <<
"Component Analysis. It generalises quite well and can handle analyses " <<
Endl;
735 Log() <<
"with large numbers of input variables." <<
Endl;
739 Log() <<
"Optimal performance requires primarily a proper choice of the kernel " <<
Endl;
740 Log() <<
"parameters (the width \"Sigma\" in case of Gaussian kernel) and the" <<
Endl;
741 Log() <<
"cost parameter \"C\". The user must optimise them empirically by running" <<
Endl;
742 Log() <<
"SVM several times with different parameter sets. The time needed for " <<
Endl;
743 Log() <<
"each evaluation scales like the square of the number of training " <<
Endl;
744 Log() <<
"events so that a coarse preliminary tuning should be performed on " <<
Endl;
745 Log() <<
"reduced data sets." <<
Endl;
762 std::map< TString,std::vector<Double_t> > optVars;
767 std::map< TString,std::vector<Double_t> >::iterator iter;
769 std::map<TString,TMVA::Interval*> tuneParameters;
770 std::map<TString,Double_t> tunedParameters;
777 tuneParameters.insert(std::pair<TString,Interval*>(
"Gamma",
new Interval(0.01,1.,100)));
778 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
781 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
782 if( iter->first ==
"Gamma" || iter->first ==
"C"){
783 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
786 Log() << kWARNING << iter->first <<
" is not a recognised tuneable parameter." <<
Endl;
794 tuneParameters.insert(std::pair<TString,Interval*>(
"Order",
new Interval(1,10,10)));
795 tuneParameters.insert(std::pair<TString,Interval*>(
"Theta",
new Interval(0.01,1.,100)));
796 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
799 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
800 if( iter->first ==
"Theta" || iter->first ==
"C"){
801 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
803 else if( iter->first ==
"Order"){
804 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
807 Log() << kWARNING << iter->first <<
" is not a recognised tuneable parameter." <<
Endl;
818 string str =
"Gamma_" + s.str();
819 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(0.01,1.,100)));
821 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
823 for(iter=optVars.begin(); iter!=optVars.end(); ++iter){
824 if( iter->first ==
"GammaList"){
828 string str =
"Gamma_" + s.str();
829 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
832 else if( iter->first ==
"C"){
833 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
836 Log() << kWARNING << iter->first <<
" is not a recognised tuneable parameter." <<
Endl;
845 while (std::getline(tempstring,value,
'*')){
847 tuneParameters.insert(std::pair<TString,Interval*>(
"Gamma",
new Interval(0.01,1.,100)));
849 else if(value ==
"MultiGauss"){
853 string str =
"Gamma_" + s.str();
854 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(0.01,1.,100)));
857 else if(value ==
"Polynomial"){
858 tuneParameters.insert(std::pair<TString,Interval*>(
"Order",
new Interval(1,10,10)));
859 tuneParameters.insert(std::pair<TString,Interval*>(
"Theta",
new Interval(0.0,1.0,101)));
862 Log() << kWARNING << value <<
" is not a recognised kernel function." <<
Endl;
866 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
871 while (std::getline(tempstring,value,
'+')){
873 tuneParameters.insert(std::pair<TString,Interval*>(
"Gamma",
new Interval(0.01,1.,100)));
875 else if(value ==
"MultiGauss"){
879 string str =
"Gamma_" + s.str();
880 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(0.01,1.,100)));
883 else if(value ==
"Polynomial"){
884 tuneParameters.insert(std::pair<TString,Interval*>(
"Order",
new Interval(1,10,10)));
885 tuneParameters.insert(std::pair<TString,Interval*>(
"Theta",
new Interval(0.0,1.0,101)));
888 Log() << kWARNING << value <<
" is not a recognised kernel function." <<
Endl;
892 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
895 Log() << kWARNING <<
fTheKernel <<
" is not a recognised kernel function." <<
Endl;
898 Log() << kINFO <<
" the following SVM parameters will be tuned on the respective *grid*\n" <<
Endl;
899 std::map<TString,TMVA::Interval*>::iterator it;
900 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
901 Log() << kWARNING << it->first <<
Endl;
902 std::ostringstream oss;
903 (it->second)->
Print(oss);
908 tunedParameters=optimize.
optimize();
910 return tunedParameters;
918 std::map<TString,Double_t>::iterator it;
920 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
921 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
922 if (it->first ==
"Gamma"){
925 else if(it->first ==
"C"){
929 Log() << kFATAL <<
" SetParameter for " << it->first <<
" not implemented " <<
Endl;
938 string str =
"Gamma_" + s.str();
939 Log() << kWARNING << tuneParameters.find(str)->first <<
" = " << tuneParameters.find(str)->second <<
Endl;
940 fmGamma.push_back(tuneParameters.find(str)->second);
942 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
943 if (it->first ==
"C"){
944 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
951 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
952 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
953 if (it->first ==
"Order"){
956 else if (it->first ==
"Theta"){
959 else if(it->first ==
"C"){
SetCost (it->second);
961 else if(it->first ==
"Mult"){
965 Log() << kFATAL <<
" SetParameter for " << it->first <<
" not implemented " <<
Endl;
971 for(it=tuneParameters.begin(); it!=tuneParameters.end(); ++it){
972 bool foundParam =
false;
973 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
977 string str =
"Gamma_" + s.str();
978 if(it->first == str){
983 if (it->first ==
"Gamma"){
987 else if (it->first ==
"Order"){
991 else if (it->first ==
"Theta"){
995 else if (it->first ==
"C"){
SetCost (it->second);
1001 Log() << kFATAL <<
" SetParameter for " << it->first <<
" not implemented " <<
Endl;
1007 Log() << kWARNING <<
fTheKernel <<
" is not a recognised kernel function." <<
Endl;
1018 std::stringstream tempstring(mg);
1020 while (tempstring >> value){
1023 if (tempstring.peek() ==
','){
1024 tempstring.ignore();
1032 std::ostringstream tempstring;
1033 for(
UInt_t i = 0; i<gammas.size(); ++i){
1034 tempstring << gammas.at(i);
1035 if(i!=(gammas.size()-1)){
1055 std::vector<TMVA::SVKernelFunction::EKernelType> kernelsList;
1056 std::stringstream tempstring(multiKernels);
1059 while (std::getline(tempstring,value,
'*')){
1061 else if(value ==
"MultiGauss"){
1069 Log() << kWARNING << value <<
" is not a recognised kernel function." <<
Endl;
1074 else if(kernel==
"Sum"){
1075 while (std::getline(tempstring,value,
'+')){
1077 else if(value ==
"MultiGauss"){
1085 Log() << kWARNING << value <<
" is not a recognised kernel function." <<
Endl;
1091 Log() << kWARNING <<
"Unable to split MultiKernels. Delimiters */+ required." <<
Endl;
1107 std::map< TString,std::vector<Double_t> > optVars;
1108 std::stringstream tempstring(
fTune);
1110 while (std::getline(tempstring,value,
',')){
1111 unsigned first = value.find(
'[')+1;
1112 unsigned last = value.find_last_of(
']');
1113 std::string optParam = value.substr(0,first-1);
1114 std::stringstream strNew (value.substr(first,last-first));
1116 std::vector<Double_t> tempVec;
1118 while (strNew >> optInterval){
1119 tempVec.push_back(optInterval);
1120 if (strNew.peek() ==
';'){
1125 if(i != 3 && i == tempVec.size()){
1126 if(optParam ==
"C" || optParam ==
"Gamma" || optParam ==
"GammaList" || optParam ==
"Theta"){
1129 tempVec.push_back(0.01);
1131 tempVec.push_back(1.);
1133 tempVec.push_back(100);
1136 else if(optParam ==
"Order"){
1139 tempVec.push_back(1);
1141 tempVec.push_back(10);
1143 tempVec.push_back(10);
1147 Log() << kWARNING << optParam <<
" is not a recognised tuneable parameter." <<
Endl;
1151 optVars.insert(std::pair<
TString,std::vector<Double_t> >(optParam,tempVec));
1173 if(lossFunction ==
"hinge"){
1176 else if(lossFunction ==
"exp"){
1179 else if(lossFunction ==
"binomial"){
1183 Log() << kWARNING << lossFunction <<
" is not a recognised loss function." <<
Endl;
1188 if(lossFunction ==
"hinge"){
1191 else if(lossFunction ==
"exp"){
1194 else if(lossFunction ==
"binomial"){
1198 Log() << kWARNING << lossFunction <<
" is not a recognised loss function." <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
int Int_t
Signed integer 4 bytes (int).
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
bool Bool_t
Boolean (0=false, 1=true) (bool).
double Double_t
Double 8 bytes.
long long Long64_t
Portable signed long integer 8 bytes.
float Float_t
Float 4 bytes (float).
TVectorT< Double_t > TVectorD
A file, usually with extension .root, that stores data and code in the form of serialized objects in ...
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Class that contains all the data information.
std::vector< VariableInfo > & GetVariableInfos()
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetTarget(UInt_t itgt) const
The TMVA::Interval Class.
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
const char * GetName() const override
Double_t GetXmin(Int_t ivar) const
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString GetMethodTypeName() const
Types::EAnalysisType GetAnalysisType() const
Bool_t IgnoreEventsWithNegWeightsInTraining() const
const TString & GetMethodName() const
UInt_t GetNEvents() const
Bool_t DoRegression() const
std::vector< Float_t > * fRegressionReturnVal
const Event * GetEvent() const
DataSetInfo & DataInfo() const
void SetNormalised(Bool_t norm)
Double_t GetXmax(Int_t ivar) const
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
IPythonInteractive * fInteractive
Double_t getLoss(TString lossFunction)
getLoss Calculates loss for testing dataset.
Float_t fTolerance
tolerance parameter
TVectorD * fMaxVars
for normalization //is it still needed??
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
SVM can handle classification with 2 classes and regression with one regression-target.
void Init(void) override
default initialisation
void Reset(void) override
void ReadWeightsFromXML(void *wghtnode) override
TVectorD * fMinVars
for normalization //is it still needed??
void Train(void) override
Train SVM.
std::vector< TString > fVarNames
void MakeClassSpecific(std::ostream &, const TString &) const override
write specific classifier response
void WriteWeightsToStream(TFile &fout) const
TODO write IT write training sample (TTree) to file.
void SetMGamma(std::string &mg)
Takes as input a string of values for multigaussian gammas and splits it, filling the gamma vector re...
SVKernelFunction * fSVKernelFunction
kernel function
Float_t fBparm
free plane coefficient
void GetHelpMessage() const override
get help message text
Float_t fDoubleSigmaSquared
for RBF Kernel
void SetTuneParameters(std::map< TString, Double_t > tuneParameters) override
Set the tuning parameters according to the argument.
void GetMGamma(const std::vector< float > &gammas)
Produces GammaList string for multigaussian kernel to be written to xml file.
Float_t fNumVars
number of input variables for multi-gaussian
void AddWeightsXMLTo(void *parent) const override
write configuration to xml file
Int_t fOrder
for Polynomial Kernel ( polynomial order )
void SetTheta(Double_t t)
void SetGamma(Double_t g)
std::vector< TMVA::SVEvent * > * fSupportVectors
contains support vectors
Float_t fKappa
for Sigmoidal Kernel
std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="Minuit") override
Optimize Tuning Parameters This is used to optimise the kernel function parameters and cost.
Float_t fGamma
RBF Kernel parameter.
void SetOrder(Double_t o)
void ProcessOptions() override
option post processing (if necessary)
UShort_t fNSubSets
nr of subsets, default 1
void DeclareOptions() override
declare options available for this method
std::map< TString, std::vector< Double_t > > GetTuningOptions()
GetTuningOptions Function to allow for ranges and number of steps (for scan) when optimising kernel f...
std::vector< Float_t > fmGamma
vector of gammas for multi-gaussian kernel
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
returns MVA value for given event
void ReadWeightsFromStream(std::istream &istr) override
std::vector< TMVA::SVEvent * > * fInputData
vector of training data in SVM format
const std::vector< Float_t > & GetRegressionValues() override
virtual ~MethodSVM(void)
destructor
void DeclareCompatibilityOptions() override
options that are used ONLY for the READER to ensure backward compatibility
TString fTheKernel
kernel name
std::string fMultiKernels
Float_t fTheta
for Sigmoidal Kernel
MethodSVM(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
std::string fTune
Specify parameters to be tuned.
UInt_t fMaxIter
max number of iteration
std::vector< TMVA::SVKernelFunction::EKernelType > MakeKernelList(std::string multiKernels, TString kernel)
MakeKernelList Function providing string manipulation for product or sum of kernels functions to take...
SVWorkingSet * fWgSet
svm working set
std::map< TString, Double_t > optimize()
Class that is the base-class for a vector of result.
Event class for Support Vector Machine.
Kernel for Support Vector Machine.
Working class for Support Vector Machine.
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Singleton class for Global types used by TMVA.
void Print(Option_t *option="") const override
Print TNamed name and title.
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Double_t Log(Double_t x)
Returns the natural logarithm of x.