47 #ifndef ROOT_TMVA_Tools 50 #ifndef ROOT_TMVA_Timer 54 #ifndef ROOT_TMVA_SVWorkingSet 58 #ifndef ROOT_TMVA_SVEvent 62 #ifndef ROOT_TMVA_SVKernelFunction 90 using std::stringstream;
112 , fSVKernelFunction(0)
115 , fDoubleSigmaSquared(0)
127 fNumVars = theData.GetVariableInfos().size();
128 for(
int i=0; i<fNumVars; i++){
129 fVarNames.push_back(theData.GetVariableInfos().at(i).GetTitle());
147 , fSVKernelFunction(0)
150 , fDoubleSigmaSquared(0)
199 Log() <<
kDEBUG <<
" successfully(?) reset the method " <<
Endl;
223 fInputData =
new std::vector<TMVA::SVEvent*>(0);
281 Log() <<
kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: " 283 <<
" --> please remove \"IgnoreNegWeightsInTraining\" option from booking string." 307 if(nSignal < nBackground){
309 CBkg = CSig*((double)nSignal/nBackground);
313 CSig = CBkg*((double)nSignal/nBackground);
387 Log() <<
kINFO <<
"Sorry, no computing time forecast available for SVM, please wait ..." <<
Endl;
418 for (std::vector<TMVA::SVEvent*>::iterator veciter=
fSupportVectors->begin();
421 temp[0] = (*veciter)->GetNs();
422 temp[1] = (*veciter)->GetTypeFlag();
423 temp[2] = (*veciter)->GetAlpha();
424 temp[3] = (*veciter)->GetAlpha_p();
426 temp[ivar+4] = (*(*veciter)->GetDataVector())[ivar];
455 std::vector<Float_t>* svector =
new std::vector<Float_t>(
GetNvar());
468 for (
UInt_t ievt = 0; ievt < fNsupv; ievt++) {
472 typeFlag=(int)temp[1];
475 for (
UInt_t ivar = 0; ivar <
GetNvar(); ivar++) (*svector)[ivar]=temp[ivar+4];
481 void* maxminnode = supportvectornode;
540 std::vector<Float_t>* svector =
new std::vector<Float_t>(
GetNvar());
545 for (
UInt_t ievt = 0; ievt < fNsupv; ievt++) {
548 typeFlag = typeTalpha<0?-1:1;
549 alpha = typeTalpha<0?-typeTalpha:typeTalpha;
550 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> svector->at(ivar);
555 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMaxVars)[ivar];
557 for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) istr >> (*fMinVars)[ivar];
569 Log() <<
kFATAL <<
"Unknown kernel function found in weight file!" <<
Endl;
648 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
649 fout <<
" float fBparameter;" << std::endl;
650 fout <<
" int fNOfSuppVec;" << std::endl;
651 fout <<
" static float fAllSuppVectors[][" << fNsupv <<
"];" << std::endl;
652 fout <<
" static float fAlphaTypeCoef[" << fNsupv <<
"];" << std::endl;
654 fout <<
" // Kernel parameter(s) " << std::endl;
655 fout <<
" float fGamma;" << std::endl;
656 fout <<
"};" << std::endl;
657 fout <<
"" << std::endl;
660 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
661 fout <<
"{" << std::endl;
662 fout <<
" fBparameter = " <<
fBparm <<
";" << std::endl;
663 fout <<
" fNOfSuppVec = " << fNsupv <<
";" << std::endl;
664 fout <<
" fGamma = " <<
fGamma <<
";" <<std::endl;
665 fout <<
"}" << std::endl;
669 fout <<
"inline double " << className <<
"::GetMvaValue__(const std::vector<double>& inputValues ) const" << std::endl;
670 fout <<
"{" << std::endl;
671 fout <<
" double mvaval = 0; " << std::endl;
672 fout <<
" double temp = 0; " << std::endl;
674 fout <<
" for (int ievt = 0; ievt < fNOfSuppVec; ievt++ ){" << std::endl;
675 fout <<
" temp = 0;" << std::endl;
676 fout <<
" for ( unsigned int ivar = 0; ivar < GetNvar(); ivar++ ) {" << std::endl;
678 fout <<
" temp += (fAllSuppVectors[ivar][ievt] - inputValues[ivar]) " << std::endl;
679 fout <<
" * (fAllSuppVectors[ivar][ievt] - inputValues[ivar]); " << std::endl;
680 fout <<
" }" << std::endl;
681 fout <<
" mvaval += fAlphaTypeCoef[ievt] * exp( -fGamma * temp ); " << std::endl;
683 fout <<
" }" << std::endl;
684 fout <<
" mvaval -= fBparameter;" << std::endl;
685 fout <<
" return 1./(1. + exp(mvaval));" << std::endl;
686 fout <<
"}" << std::endl;
687 fout <<
"// Clean up" << std::endl;
688 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
689 fout <<
"{" << std::endl;
690 fout <<
" // nothing to clear " << std::endl;
691 fout <<
"}" << std::endl;
692 fout <<
"" << std::endl;
695 fout <<
"float " << className <<
"::fAlphaTypeCoef[] =" << std::endl;
697 for (
Int_t isv = 0; isv < fNsupv; isv++) {
699 if (isv < fNsupv-1) fout <<
", ";
701 fout <<
" };" << std::endl << std::endl;
703 fout <<
"float " << className <<
"::fAllSuppVectors[][" << fNsupv <<
"] =" << std::endl;
708 for (
Int_t isv = 0; isv < fNsupv; isv++){
710 if (isv < fNsupv-1) fout <<
", ";
713 if (ivar <
GetNvar()-1) fout <<
", " << std::endl;
714 else fout << std::endl;
716 fout <<
"};" << std::endl<< std::endl;
730 Log() <<
"The Support Vector Machine (SVM) builds a hyperplance separating" <<
Endl;
731 Log() <<
"signal and background events (vectors) using the minimal subset of " <<
Endl;
732 Log() <<
"all vectors used for training (support vectors). The extension to" <<
Endl;
733 Log() <<
"the non-linear case is performed by mapping input vectors into a " <<
Endl;
734 Log() <<
"higher-dimensional feature space in which linear separation is " <<
Endl;
735 Log() <<
"possible. The use of the kernel functions thereby eliminates the " <<
Endl;
736 Log() <<
"explicit transformation to the feature space. The implemented SVM " <<
Endl;
737 Log() <<
"algorithm performs the classification tasks using linear, polynomial, " <<
Endl;
738 Log() <<
"Gaussian and sigmoidal kernel functions. The Gaussian kernel allows " <<
Endl;
739 Log() <<
"to apply any discriminant shape in the input space." <<
Endl;
743 Log() <<
"SVM is a general purpose non-linear classification method, which " <<
Endl;
744 Log() <<
"does not require data preprocessing like decorrelation or Principal " <<
Endl;
745 Log() <<
"Component Analysis. It generalises quite well and can handle analyses " <<
Endl;
746 Log() <<
"with large numbers of input variables." <<
Endl;
750 Log() <<
"Optimal performance requires primarily a proper choice of the kernel " <<
Endl;
751 Log() <<
"parameters (the width \"Sigma\" in case of Gaussian kernel) and the" <<
Endl;
752 Log() <<
"cost parameter \"C\". The user must optimise them empirically by running" <<
Endl;
753 Log() <<
"SVM several times with different parameter sets. The time needed for " <<
Endl;
754 Log() <<
"each evaluation scales like the square of the number of training " <<
Endl;
755 Log() <<
"events so that a coarse preliminary tuning should be performed on " <<
Endl;
756 Log() <<
"reduced data sets." <<
Endl;
769 std::map< TString,std::vector<Double_t> > optVars;
774 std::map< TString,std::vector<Double_t> >::iterator iter;
776 std::map<TString,TMVA::Interval*> tuneParameters;
777 std::map<TString,Double_t> tunedParameters;
784 tuneParameters.insert(std::pair<TString,Interval*>(
"Gamma",
new Interval(0.01,1.,100)));
785 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
788 for(iter=optVars.begin(); iter!=optVars.end(); iter++){
789 if( iter->first ==
"Gamma" || iter->first ==
"C"){
790 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
793 Log() <<
kWARNING << iter->first <<
" is not a recognised tuneable parameter." <<
Endl;
801 tuneParameters.insert(std::pair<TString,Interval*>(
"Order",
new Interval(1,10,10)));
802 tuneParameters.insert(std::pair<TString,Interval*>(
"Theta",
new Interval(0.01,1.,100)));
803 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
806 for(iter=optVars.begin(); iter!=optVars.end(); iter++){
807 if( iter->first ==
"Theta" || iter->first ==
"C"){
808 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
810 else if( iter->first ==
"Order"){
811 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
814 Log() <<
kWARNING << iter->first <<
" is not a recognised tuneable parameter." <<
Endl;
825 string str =
"Gamma_" + s.str();
826 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(0.01,1.,100)));
828 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
830 for(iter=optVars.begin(); iter!=optVars.end(); iter++){
831 if( iter->first ==
"GammaList"){
835 string str =
"Gamma_" + s.str();
836 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
839 else if( iter->first ==
"C"){
840 tuneParameters.insert(std::pair<TString,Interval*>(iter->first,
new Interval(iter->second.at(0),iter->second.at(1),iter->second.at(2))));
843 Log() <<
kWARNING << iter->first <<
" is not a recognised tuneable parameter." <<
Endl;
852 while (std::getline(tempstring,value,
'*')){
854 tuneParameters.insert(std::pair<TString,Interval*>(
"Gamma",
new Interval(0.01,1.,100)));
856 else if(value ==
"MultiGauss"){
860 string str =
"Gamma_" + s.str();
861 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(0.01,1.,100)));
864 else if(value ==
"Polynomial"){
865 tuneParameters.insert(std::pair<TString,Interval*>(
"Order",
new Interval(1,10,10)));
866 tuneParameters.insert(std::pair<TString,Interval*>(
"Theta",
new Interval(0.0,1.0,101)));
869 Log() <<
kWARNING << value <<
" is not a recognised kernel function." <<
Endl;
873 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
878 while (std::getline(tempstring,value,
'+')){
880 tuneParameters.insert(std::pair<TString,Interval*>(
"Gamma",
new Interval(0.01,1.,100)));
882 else if(value ==
"MultiGauss"){
886 string str =
"Gamma_" + s.str();
887 tuneParameters.insert(std::pair<TString,Interval*>(str,
new Interval(0.01,1.,100)));
890 else if(value ==
"Polynomial"){
891 tuneParameters.insert(std::pair<TString,Interval*>(
"Order",
new Interval(1,10,10)));
892 tuneParameters.insert(std::pair<TString,Interval*>(
"Theta",
new Interval(0.0,1.0,101)));
895 Log() <<
kWARNING << value <<
" is not a recognised kernel function." <<
Endl;
899 tuneParameters.insert(std::pair<TString,Interval*>(
"C",
new Interval(0.01,1.,100)));
905 Log() <<
kINFO <<
" the following SVM parameters will be tuned on the respective *grid*\n" <<
Endl;
906 std::map<TString,TMVA::Interval*>::iterator it;
907 for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
909 std::ostringstream oss;
910 (it->second)->
Print(oss);
915 tunedParameters=optimize.
optimize();
917 return tunedParameters;
925 std::map<TString,Double_t>::iterator it;
927 for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
929 if (it->first ==
"Gamma"){
932 else if(it->first ==
"C"){
936 Log() <<
kFATAL <<
" SetParameter for " << it->first <<
" not implemented " <<
Endl;
945 string str =
"Gamma_" + s.str();
946 Log() <<
kWARNING << tuneParameters.find(str)->first <<
" = " << tuneParameters.find(str)->second <<
Endl;
947 fmGamma.push_back(tuneParameters.find(str)->second);
949 for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
950 if (it->first ==
"C"){
958 for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
960 if (it->first ==
"Order"){
963 else if (it->first ==
"Theta"){
966 else if(it->first ==
"C"){
SetCost (it->second);
968 else if(it->first ==
"Mult"){
972 Log() <<
kFATAL <<
" SetParameter for " << it->first <<
" not implemented " <<
Endl;
978 for(it=tuneParameters.begin(); it!=tuneParameters.end(); it++){
979 bool foundParam =
false;
984 string str =
"Gamma_" + s.str();
985 if(it->first == str){
990 if (it->first ==
"Gamma"){
994 else if (it->first ==
"Order"){
998 else if (it->first ==
"Theta"){
1002 else if (it->first ==
"C"){
SetCost (it->second);
1008 Log() <<
kFATAL <<
" SetParameter for " << it->first <<
" not implemented " <<
Endl;
1025 std::stringstream tempstring(mg);
1027 while (tempstring >> value){
1030 if (tempstring.peek() ==
','){
1031 tempstring.ignore();
1039 std::ostringstream tempstring;
1040 for(
UInt_t i = 0; i<gammas.size(); ++i){
1041 tempstring << gammas.at(i);
1042 if(i!=(gammas.size()-1)){
1058 std::vector<TMVA::SVKernelFunction::EKernelType> kernelsList;
1059 std::stringstream tempstring(multiKernels);
1062 while (std::getline(tempstring,value,
'*')){
1064 else if(value ==
"MultiGauss"){
1072 Log() <<
kWARNING << value <<
" is not a recognised kernel function." <<
Endl;
1077 else if(kernel==
"Sum"){
1078 while (std::getline(tempstring,value,
'+')){
1080 else if(value ==
"MultiGauss"){
1088 Log() <<
kWARNING << value <<
" is not a recognised kernel function." <<
Endl;
1094 Log() <<
kWARNING <<
"Unable to split MultiKernels. Delimiters */+ required." <<
Endl;
1110 std::map< TString,std::vector<Double_t> > optVars;
1111 std::stringstream tempstring(
fTune);
1113 while (std::getline(tempstring,value,
',')){
1114 unsigned first = value.find(
'[')+1;
1115 unsigned last = value.find_last_of(
']');
1116 std::string optParam = value.substr(0,first-1);
1117 std::stringstream strNew (value.substr(first,last-first));
1119 std::vector<Double_t> tempVec;
1121 while (strNew >> optInterval){
1122 tempVec.push_back(optInterval);
1123 if (strNew.peek() ==
';'){
1128 if(i != 3 && i == tempVec.size()){
1129 if(optParam ==
"C" || optParam ==
"Gamma" || optParam ==
"GammaList" || optParam ==
"Theta"){
1132 tempVec.push_back(0.01);
1134 tempVec.push_back(1.);
1136 tempVec.push_back(100);
1139 else if(optParam ==
"Order"){
1142 tempVec.push_back(1);
1144 tempVec.push_back(10);
1146 tempVec.push_back(10);
1150 Log() <<
kWARNING << optParam <<
" is not a recognised tuneable parameter." <<
Endl;
1154 optVars.insert(std::pair<
TString,std::vector<Double_t> >(optParam,tempVec));
1175 if(lossFunction ==
"hinge"){
1178 else if(lossFunction ==
"exp"){
1181 else if(lossFunction ==
"binomial"){
1185 Log() <<
kWARNING << lossFunction <<
" is not a recognised loss function." <<
Endl;
1190 if(lossFunction ==
"hinge"){
1193 else if(lossFunction ==
"exp"){
1196 else if(lossFunction ==
"binomial"){
1200 Log() <<
kWARNING << lossFunction <<
" is not a recognised loss function." <<
Endl;
void Train(void)
Train SVM.
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
std::vector< TMVA::SVKernelFunction::EKernelType > MakeKernelList(std::string multiKernels, TString kernel)
MakeKernelList Function providing string manipulation for product or sum of kernels functions to take...
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
MsgLogger & Endl(MsgLogger &ml)
Float_t fDoubleSigmaSquared
std::map< TString, std::vector< Double_t > > GetTuningOptions()
GetTuningOptions Function to allow for ranges and number of steps (for scan) when optimising kernel f...
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
std::string fMultiKernels
void Train(UInt_t nIter=1000)
train the SVM
std::vector< TMVA::SVEvent * > * fInputData
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
void AddWeightsXMLTo(void *parent) const
write configuration to xml file
void ProcessOptions()
option post processing (if necessary)
TString GetElapsedTime(Bool_t Scientific=kTRUE)
void setCompatibilityParams(EKernelType k, UInt_t order, Float_t theta, Float_t kappa)
set old options for compatibility mode
std::vector< TString > fVarNames
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
Set the tuning parameters according to the arguement.
std::vector< Float_t > fmGamma
void SetOrder(Double_t o)
const Event * GetEvent() const
void GetMGamma(const std::vector< float > &gammas)
Produces GammaList string for multigaussian kernel to be written to xml file.
Double_t GetXmin(Int_t ivar) const
DataSetInfo & DataInfo() const
Bool_t DoRegression() const
void DeclareOptions()
declare options available for this method
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
TVectorT< Double_t > TVectorD
virtual void Print(Option_t *option="") const
Print TNamed name and title.
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Double_t GetXmax(Int_t ivar) const
void SetGamma(Double_t g)
Float_t GetTarget(UInt_t itgt) const
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
void ReadWeightsFromXML(void *wghtnode)
void ReadWeightsFromStream(std::istream &istr)
MethodSVM(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
const char * GetName() const
std::map< TString, Double_t > optimize()
void Init(void)
default initialisation
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particulary Method instance (here appareantly called resultsName i...
SVKernelFunction * fSVKernelFunction
void SetTheta(Double_t t)
const TString & GetMethodName() const
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="Minuit")
Optimize Tuning Parameters This is used to optimise the kernel function parameters and cost...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
SVM can handle classification with 2 classes and regression with one regression-target.
void SetNormalised(Bool_t norm)
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
std::vector< TMVA::SVEvent * > * fSupportVectors
void WriteWeightsToStream(TFile &fout) const
TODO write IT write training sample (TTree) to file.
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Double_t getLoss(TString lossFunction)
getLoss Calculates loss for testing dataset.
std::vector< TMVA::SVEvent * > * GetSupportVectors()
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
void SetCurrentType(Types::ETreeType type) const
void GetHelpMessage() const
get help message text
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
IPythonInteractive * fInteractive
virtual ~MethodSVM(void)
destructor
TString GetMethodTypeName() const
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Bool_t IsSignal(const Event *ev) const
std::vector< Float_t > * fRegressionReturnVal
Types::EAnalysisType GetAnalysisType() const
void SetIPythonInteractive(bool *ExitFromTraining, UInt_t *fIPyCurrentIter_)
void SetMGamma(std::string &mg)
Takes as input a string of values for multigaussian gammas and splits it, filling the gamma vector re...
std::vector< VariableInfo > & GetVariableInfos()
Float_t Evaluate(SVEvent *ev1, SVEvent *ev2)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
const std::vector< Float_t > & GetRegressionValues()