134 fFitMethod ( kUseGeneticAlgorithm ),
135 fEffMethod ( kUseEventSelection ),
160 fVarHistS_smooth( 0 ),
161 fVarHistB_smooth( 0 ),
172 const TString& theWeightFile) :
321 DeclareOptionRef(
fFitMethodS =
"GA",
"FitMethod",
"Minimisation Method (GA, SA, and MC are the primary methods to be used; the others have been introduced for testing purposes and are depreciated)");
366 Log() << kWARNING <<
"Normalisation of the input variables for cut optimisation is not" <<
Endl;
367 Log() << kWARNING <<
"supported because this provides intransparent cut values, and no" <<
Endl;
368 Log() << kWARNING <<
"improvement in the performance of the algorithm." <<
Endl;
369 Log() << kWARNING <<
"Please remove \"Normalise\" option from booking option string" <<
Endl;
370 Log() << kWARNING <<
"==> Will reset normalisation flag to \"False\"" <<
Endl;
375 Log() << kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: " 377 <<
" --> Please remove \"IgnoreNegWeightsInTraining\" option from booking string." 387 Log() << kWARNING <<
"poor performance of MINUIT in MethodCuts; preferred fit method: GA" <<
Endl;
397 Log() << kINFO <<
Form(
"Use optimization method: \"%s\"",
402 Log() << kINFO <<
Form(
"Use efficiency computation method: \"%s\"",
418 Log() << kFATAL <<
"unknown value \'" <<
fAllVarsI[ivar]
419 <<
"\' for fit parameter option " <<
Form(
"VarProp[%i]",ivar) <<
Endl;
421 (*fFitParams)[ivar] = theFitP;
425 <<
"\" cuts for variable: " <<
"'" << (*fInputVars)[ivar] <<
"'" <<
Endl;
439 Log() << kFATAL <<
"<Eval_Cuts> fCutMin/Max have zero pointer. " 440 <<
"Did you book Cuts ?" <<
Endl;
449 if (ibin < 0 ) ibin = 0;
457 return passed ? 1. : 0. ;
467 std::vector<Double_t> cutsMin;
468 std::vector<Double_t> cutsMax;
474 std::vector<TString>* varVec = 0;
477 varVec =
new std::vector<TString>;
478 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
479 varVec->push_back(
DataInfo().GetVariableInfo(ivar).GetLabel() );
488 varVec =
new std::vector<TString>;
489 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
490 varVec->push_back(
DataInfo().GetVariableInfo(ivar).GetLabel() +
" [transformed]" );
495 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
496 if ((
UInt_t)(*varVec)[ivar].Length() > maxL) maxL = (*varVec)[ivar].Length();
498 UInt_t maxLine = 20+maxL+16;
500 for (
UInt_t i=0; i<maxLine; i++)
Log() <<
"-";
502 Log() << kHEADER <<
"Cut values for requested signal efficiency: " << trueEffS <<
Endl;
505 Log() << kINFO <<
"Transformation applied to input variables : \"" 510 <<
" transformations applied in transformation chain; cuts applied on transformed quantities ] " <<
Endl;
513 Log() << kINFO <<
"Transformation applied to input variables : None" <<
Endl;
515 for (
UInt_t i=0; i<maxLine; i++)
Log() <<
"-";
517 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
519 <<
"Cut[" << std::setw(2) << ivar <<
"]: " 520 << std::setw(10) << cutsMin[ivar]
522 << std::setw(maxL) << (*varVec)[ivar]
524 << std::setw(10) << cutsMax[ivar] <<
Endl;
526 for (
UInt_t i=0; i<maxLine; i++)
Log() <<
"-";
538 std::vector<Double_t> cMin(
GetNvar() );
539 std::vector<Double_t> cMax(
GetNvar() );
542 cutMin[ivar] = cMin[ivar];
543 cutMax[ivar] = cMax[ivar];
552 std::vector<Double_t>& cutMin,
553 std::vector<Double_t>& cutMax )
const 562 if (ibin < 0 ) ibin = 0;
568 cutMin.push_back(
fCutMin[ivar][ibin] );
569 cutMax.push_back(
fCutMax[ivar][ibin] );
620 std::vector<TH1F*> signalDist, bkgDist;
638 std::vector<Interval*> ranges;
695 for (
UInt_t ivar=0; ivar<ranges.size(); ivar++)
delete ranges[ivar];
706 Int_t nsamples =
Int_t(0.5*nevents*(nevents - 1));
710 Log() << kINFO <<
"Running full event scan: " <<
Endl;
711 for (
Int_t ievt1=0; ievt1<nevents; ievt1++) {
712 for (
Int_t ievt2=ievt1+1; ievt2<nevents; ievt2++) {
727 Int_t nsamples = 200000;
729 DeclareOptionRef( nsamples,
"SampleSize",
"Number of Monte-Carlo-Event samples" );
730 DeclareOptionRef( seed,
"Seed",
"Seed for the random generator (0 takes random seeds)" );
743 Log() << kINFO <<
"Running Monte-Carlo-Event sampling over " << nsamples <<
" events" <<
Endl;
744 std::vector<Double_t> pars( 2*
GetNvar() );
746 for (
Int_t itoy=0; itoy<nsamples; itoy++) {
765 evt1 = ev1->GetValue( ivar );
771 if (nbreak++ > 10000) {
772 Log() << kFATAL <<
"<MCEvents>: could not find signal events" 773 <<
" after 10000 trials - do you have signal events in your sample ?" 780 if (evt1 > evt2) {
Double_t z = evt1; evt1 = evt2; evt2 =
z; }
782 pars[2*ivar+1] = evt2 - evt1;
837 if (!
DataInfo().IsSignal(ev1))
return -1;
840 if (!
DataInfo().IsSignal(ev2))
return -1;
846 for (
Int_t ivar=0; ivar<nvar; ivar++) {
852 std::vector<Double_t> pars;
853 for (
Int_t ivar=0; ivar<nvar; ivar++) {
856 if (evt1[ivar] < evt2[ivar]) {
865 pars.push_back( cutMin );
866 pars.push_back( cutMax - cutMin );
928 Double_t average = 0.5*(effBH_left + effBH_right);
929 if (effBH < effB) average = effBH;
933 eta = ( -
TMath::Abs(effBH-average) + (1.0 - (effBH - effB))) / (1.0 + effS);
940 if (effBH < 0 || effBH > effB) {
962 penalty+=4.*diff*diff;
965 if (effS<1.
e-4)
return 10.0+penalty;
966 else return 10.*(1.-10.*effS);
979 cutMin[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] : pars[ipar] - pars[ipar+1];
980 cutMax[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] + pars[ipar+1] : pars[ipar];
990 if (ibin < 1 || ibin >
fNbins)
Log() << kFATAL <<
"::MatchCutsToPars: bin error: " 996 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
997 cutMin[ivar] = cutMinAll[ivar][ibin-1];
998 cutMax[ivar] = cutMaxAll[ivar][ibin-1];
1013 Int_t ipar = 2*ivar;
1014 pars[ipar] = ((*fRangeSign)[ivar] > 0) ? cutMin[ivar] : cutMax[ivar];
1015 pars[ipar+1] = cutMax[ivar] - cutMin[ivar];
1029 effS *= (*fVarPdfS)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
1030 effB *= (*fVarPdfB)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
1036 if( !
fNegEffWarning )
Log() << kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1041 if( !
fNegEffWarning )
Log() << kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1069 if (nTotS == 0 && nTotB == 0) {
1070 Log() << kFATAL <<
"<GetEffsfromSelection> fatal error in zero total number of events:" 1071 <<
" nTotS, nTotB: " << nTotS <<
" " << nTotB <<
" ***" <<
Endl;
1078 Log() << kWARNING <<
"<ComputeEstimator> zero number of signal events" <<
Endl;
1080 else if (nTotB == 0) {
1083 Log() << kWARNING <<
"<ComputeEstimator> zero number of background events" <<
Endl;
1093 if( !
fNegEffWarning )
Log() << kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1098 if( !
fNegEffWarning )
Log() << kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1124 if( val > minVal ) minVal = val;
1125 if( val < maxVal ) maxVal = val;
1131 TString histTitle = (*fInputVars)[ivar] +
" signal training";
1132 TString histName = (*fInputVars)[ivar] +
"_sig";
1143 (*fVarHistS)[ivar] =
new TH1F(histName.
Data(), histTitle.
Data(),
fNbins, minVal, maxVal );
1146 histTitle = (*fInputVars)[ivar] +
" background training";
1147 histName = (*fInputVars)[ivar] +
"_bgd";
1158 (*fVarHistB)[ivar] =
new TH1F(histName.
Data(), histTitle.Data(),
fNbins, minVal, maxVal );
1164 (*fVarHistS)[ivar]->Fill( val );
1166 (*fVarHistB)[ivar]->Fill( val );
1173 (*fVarHistS_smooth)[ivar] = (
TH1F*)(*
fVarHistS)[ivar]->Clone();
1174 histTitle = (*fInputVars)[ivar] +
" signal training smoothed ";
1175 histTitle += nsmooth;
1176 histTitle +=
" times";
1177 histName = (*fInputVars)[ivar] +
"_sig_smooth";
1178 (*fVarHistS_smooth)[ivar]->SetName(histName);
1179 (*fVarHistS_smooth)[ivar]->SetTitle(histTitle);
1182 (*fVarHistS_smooth)[ivar]->Smooth(nsmooth);
1197 (*fVarHistB_smooth)[ivar] = (
TH1F*)(*fVarHistB)[ivar]->
Clone();
1198 histTitle = (*fInputVars)[ivar]+
" background training smoothed ";
1199 histTitle += nsmooth;
1200 histTitle +=
" times";
1201 histName = (*fInputVars)[ivar]+
"_bgd_smooth";
1202 (*fVarHistB_smooth)[ivar]->SetName(histName);
1203 (*fVarHistB_smooth)[ivar]->SetTitle(histTitle);
1206 (*fVarHistB_smooth)[ivar]->Smooth(nsmooth);
1223 istr >> dummy >>
dummy;
1228 istr >> dummy >> dummy >> dummy >> dummy >> dummy >> dummy >> dummyInt >>
dummy ;
1232 Log() << kFATAL <<
"<ReadWeightsFromStream> fatal error: mismatch " 1239 Log() << kWARNING <<
"Read cuts optimised using sample of MC events" <<
Endl;
1242 Log() << kWARNING <<
"Read cuts optimised using sample of MC events" <<
Endl;
1245 Log() << kINFO <<
"Read cuts optimised using Genetic Algorithm" <<
Endl;
1248 Log() << kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" <<
Endl;
1251 Log() << kINFO <<
"Read cuts optimised using Full Event Scan" <<
Endl;
1256 Log() << kINFO <<
"in " << fNbins <<
" signal efficiency bins and for " <<
GetNvar() <<
" variables" <<
Endl;
1260 istr.getline(buffer,200);
1261 istr.getline(buffer,200);
1267 TString(
GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
1271 istr >> tmpbin >> tmpeffS >> tmpeffB;
1290 std::vector<Double_t> cutsMin;
1291 std::vector<Double_t> cutsMax;
1297 gTools().
AddComment( wght,
Form(
"Below are the optimised cuts for %i variables: Format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0] ... cutMin[ivar=n-1] cutMax[ivar=n-1]",
GetNvar() ) );
1337 Int_t tmpEffMethod, tmpFitMethod;
1338 gTools().
ReadAttr( wghtnode,
"OptimisationMethod", tmpEffMethod );
1347 Log() << kINFO <<
"Read cuts optimised using sample of MC events" <<
Endl;
1350 Log() << kINFO <<
"Read cuts optimised using sample of MC-Event events" <<
Endl;
1353 Log() << kINFO <<
"Read cuts optimised using Genetic Algorithm" <<
Endl;
1356 Log() << kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" <<
Endl;
1359 Log() << kINFO <<
"Read cuts optimised using Full Event Scan" <<
Endl;
1364 Log() << kINFO <<
"Reading " <<
fNbins <<
" signal efficiency bins for " <<
GetNvar() <<
" variables" <<
Endl;
1394 if (tmpbin-1 >=
fNbins || tmpbin-1 < 0) {
1395 Log() << kFATAL <<
"Mismatch in bins: " << tmpbin-1 <<
" >= " <<
fNbins <<
Endl;
1420 (*fVarHistS)[ivar]->Write();
1421 (*fVarHistB)[ivar]->Write();
1422 (*fVarHistS_smooth)[ivar]->Write();
1423 (*fVarHistB_smooth)[ivar]->Write();
1424 (*fVarPdfS)[ivar]->GetPDFHist()->Write();
1425 (*fVarPdfB)[ivar]->GetPDFHist()->Write();
1448 Log() << kFATAL <<
"<GetTrainingEfficiency> wrong number of arguments" 1449 <<
" in string: " << theString
1450 <<
" | required format, e.g., Efficiency:0.05" <<
Endl;
1463 if (results->
GetHist(
"EFF_BVSS_TR")==0) {
1483 results->
Store(eff_bvss_tr,
"EFF_BVSS_TR");
1484 results->
Store(rej_bvss_tr,
"REJ_BVSS_TR");
1491 Int_t nFailedBins=0;
1494 tmpCutMin[ivar] =
fCutMin[ivar][bini-1];
1495 tmpCutMax[ivar] =
fCutMax[ivar][bini-1];
1502 if (effBin != bini){
1503 Log()<< kVERBOSE <<
"unable to fill efficiency bin " << bini<<
" " << effBin <<
Endl;
1512 if (nFailedBins>0)
Log()<< kWARNING <<
" unable to fill "<< nFailedBins <<
" efficiency bins " <<
Endl;
1514 delete [] tmpCutMin;
1515 delete [] tmpCutMax;
1525 Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
1526 Int_t nbins_ = 1000;
1529 for (
Int_t bini=1; bini<=nbins_; bini++) {
1531 effS = (bini - 0.5)/
Float_t(nbins_);
1535 if ((effB - effBref)*(effB_ - effBref) < 0)
break;
1540 return 0.5*(effS + effS_);
1566 Log() << kFATAL <<
"<GetEfficiency> wrong number of arguments" 1567 <<
" in string: " << theString
1568 <<
" | required format, e.g., Efficiency:0.05, or empty string" <<
Endl;
1583 if (results->
GetHist(
"MVA_EFF_BvsS")==0) {
1607 results->
Store(eff_BvsS,
"MVA_EFF_BvsS");
1608 results->
Store(rej_BvsS);
1617 results->
Store(eff_s,
"MVA_S");
1618 results->
Store(eff_b,
"MVA_B");
1630 tmpCutMin[ivar] =
fCutMin[ivar][bini-1];
1631 tmpCutMax[ivar] =
fCutMax[ivar][bini-1];
1636 tmpBvsS->
SetPoint(bini, effS, effB);
1643 delete [] tmpCutMin;
1644 delete [] tmpCutMax;
1660 Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
1661 Int_t nbins_ = 1000;
1667 for (
Int_t bini=1; bini<=nbins_; bini++) {
1670 effS = (bini - 0.5)/
Float_t(nbins_);
1672 integral += (1.0 - effB);
1681 for (
Int_t bini=1; bini<=nbins_; bini++) {
1683 effS = (bini - 0.5)/
Float_t(nbins_);
1687 if ((effB - effBref)*(effB_ - effBref) < 0)
break;
1692 effS = 0.5*(effS + effS_);
1709 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
1710 fout <<
"};" << std::endl;
1728 Log() <<
"The optimisation of rectangular cuts performed by TMVA maximises " <<
Endl;
1729 Log() <<
"the background rejection at given signal efficiency, and scans " <<
Endl;
1730 Log() <<
"over the full range of the latter quantity. Three optimisation" <<
Endl;
1731 Log() <<
"methods are optional: Monte Carlo sampling (MC), a Genetics" <<
Endl;
1732 Log() <<
"Algorithm (GA), and Simulated Annealing (SA). GA and SA are" <<
Endl;
1733 Log() <<
"expected to perform best." <<
Endl;
1735 Log() <<
"The difficulty to find the optimal cuts strongly increases with" <<
Endl;
1736 Log() <<
"the dimensionality (number of input variables) of the problem." <<
Endl;
1737 Log() <<
"This behavior is due to the non-uniqueness of the solution space."<<
Endl;
1741 Log() <<
"If the dimensionality exceeds, say, 4 input variables, it is " <<
Endl;
1742 Log() <<
"advisable to scrutinize the separation power of the variables," <<
Endl;
1743 Log() <<
"and to remove the weakest ones. If some among the input variables" <<
Endl;
1744 Log() <<
"can be described by a single cut (e.g., because signal tends to be" <<
Endl;
1745 Log() <<
"larger than background), this can be indicated to MethodCuts via" <<
Endl;
1746 Log() <<
"the \"Fsmart\" options (see option string). Choosing this option" <<
Endl;
1747 Log() <<
"reduces the number of requirements for the variable from 2 (min/max)" <<
Endl;
1748 Log() <<
"to a single one (TMVA finds out whether it is to be interpreted as" <<
Endl;
1749 Log() <<
"min or max)." <<
Endl;
1753 Log() << bold <<
"Monte Carlo sampling:" << resbold <<
Endl;
1755 Log() <<
"Apart form the \"Fsmart\" option for the variables, the only way" <<
Endl;
1756 Log() <<
"to improve the MC sampling is to increase the sampling rate. This" <<
Endl;
1757 Log() <<
"is done via the configuration option \"MC_NRandCuts\". The execution" <<
Endl;
1758 Log() <<
"time scales linearly with the sampling rate." <<
Endl;
1760 Log() << bold <<
"Genetic Algorithm:" << resbold <<
Endl;
1762 Log() <<
"The algorithm terminates if no significant fitness increase has" <<
Endl;
1763 Log() <<
"been achieved within the last \"nsteps\" steps of the calculation." <<
Endl;
1764 Log() <<
"Wiggles in the ROC curve or constant background rejection of 1" <<
Endl;
1765 Log() <<
"indicate that the GA failed to always converge at the true maximum" <<
Endl;
1766 Log() <<
"fitness. In such a case, it is recommended to broaden the search " <<
Endl;
1767 Log() <<
"by increasing the population size (\"popSize\") and to give the GA " <<
Endl;
1768 Log() <<
"more time to find improvements by increasing the number of steps" <<
Endl;
1769 Log() <<
"(\"nsteps\")" <<
Endl;
1770 Log() <<
" -> increase \"popSize\" (at least >10 * number of variables)" <<
Endl;
1771 Log() <<
" -> increase \"nsteps\"" <<
Endl;
1773 Log() << bold <<
"Simulated Annealing (SA) algorithm:" << resbold <<
Endl;
1775 Log() <<
"\"Increasing Adaptive\" approach:" <<
Endl;
1777 Log() <<
"The algorithm seeks local minima and explores their neighborhoods, while" <<
Endl;
1778 Log() <<
"changing the ambient temperature depending on the number of failures" <<
Endl;
1779 Log() <<
"in the previous steps. The performance can be improved by increasing" <<
Endl;
1780 Log() <<
"the number of iteration steps (\"MaxCalls\"), or by adjusting the" <<
Endl;
1781 Log() <<
"minimal temperature (\"MinTemperature\"). Manual adjustments of the" <<
Endl;
1782 Log() <<
"speed of the temperature increase (\"TemperatureScale\" and \"AdaptiveSpeed\")" <<
Endl;
1783 Log() <<
"to individual data sets should also help. Summary:" << brk <<
Endl;
1784 Log() <<
" -> increase \"MaxCalls\"" << brk <<
Endl;
1785 Log() <<
" -> adjust \"MinTemperature\"" << brk <<
Endl;
1786 Log() <<
" -> adjust \"TemperatureScale\"" << brk <<
Endl;
1787 Log() <<
" -> adjust \"AdaptiveSpeed\"" <<
Endl;
1789 Log() <<
"\"Decreasing Adaptive\" approach:" <<
Endl;
1791 Log() <<
"The algorithm calculates the initial temperature (based on the effect-" <<
Endl;
1792 Log() <<
"iveness of large steps) and the multiplier that ensures to reach the" <<
Endl;
1793 Log() <<
"minimal temperature with the requested number of iteration steps." <<
Endl;
1794 Log() <<
"The performance can be improved by adjusting the minimal temperature" <<
Endl;
1795 Log() <<
" (\"MinTemperature\") and by increasing number of steps (\"MaxCalls\"):" << brk <<
Endl;
1796 Log() <<
" -> increase \"MaxCalls\"" << brk <<
Endl;
1797 Log() <<
" -> adjust \"MinTemperature\"" <<
Endl;
1799 Log() <<
"Other kernels:" <<
Endl;
1801 Log() <<
"Alternative ways of counting the temperature change are implemented. " <<
Endl;
1802 Log() <<
"Each of them starts with the maximum temperature (\"MaxTemperature\")" <<
Endl;
1803 Log() <<
"and decreases while changing the temperature according to a given" <<
Endl;
1804 Log() <<
"prescription:" << brk <<
Endl;
1805 Log() <<
"CurrentTemperature =" << brk <<
Endl;
1806 Log() <<
" - Sqrt: InitialTemperature / Sqrt(StepNumber+2) * TemperatureScale" << brk <<
Endl;
1807 Log() <<
" - Log: InitialTemperature / Log(StepNumber+2) * TemperatureScale" << brk <<
Endl;
1808 Log() <<
" - Homo: InitialTemperature / (StepNumber+2) * TemperatureScale" << brk <<
Endl;
1809 Log() <<
" - Sin: (Sin(StepNumber / TemperatureScale) + 1) / (StepNumber + 1)*InitialTemperature + Eps" << brk <<
Endl;
1810 Log() <<
" - Geo: CurrentTemperature * TemperatureScale" <<
Endl;
1812 Log() <<
"Their performance can be improved by adjusting initial temperature" <<
Endl;
1813 Log() <<
"(\"InitialTemperature\"), the number of iteration steps (\"MaxCalls\")," <<
Endl;
1814 Log() <<
"and the multiplier that scales the temperature decrease" <<
Endl;
1815 Log() <<
"(\"TemperatureScale\")" << brk <<
Endl;
1816 Log() <<
" -> increase \"MaxCalls\"" << brk <<
Endl;
1817 Log() <<
" -> adjust \"InitialTemperature\"" << brk <<
Endl;
1818 Log() <<
" -> adjust \"TemperatureScale\"" << brk <<
Endl;
1819 Log() <<
" -> adjust \"KernelTemperature\"" <<
Endl;
std::vector< Double_t > * fRmsS
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
cut evaluation: returns 1.0 if event passed, 0.0 otherwise
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
void GetHelpMessage() const
get help message text
void WriteMonitoringHistosToFile(void) const
write histograms and PDFs to file for monitoring purposes
Random number generator class based on M.
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
MsgLogger & Endl(MsgLogger &ml)
Singleton class for Global types used by TMVA.
Base class for TMVA fitters.
Double_t ComputeEstimator(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA.
void TestClassification()
nothing to test
Collectable string class.
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
MethodCuts(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="MC:150:10000:")
standard constructor
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void MatchParsToCuts(const std::vector< Double_t > &, Double_t *, Double_t *)
translates parameters into cuts
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
void Clone(Ssiz_t nc)
Make self a distinct copy with capacity of at least tot, where tot cannot be smaller than the current...
Double_t Fill(const std::vector< TMVA::Event *> &events, const std::vector< Int_t > &theVars, Int_t theType=-1)
create the search tree from the event collection using ONLY the variables specified in "theVars" ...
Virtual base Class for all MVA method.
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
std::vector< TH1 * > * fVarHistS
1-D histogram with a float per channel (see TH1 documentation)}
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Double_t GetCuts(Double_t effS, std::vector< Double_t > &cutMin, std::vector< Double_t > &cutMax) const
retrieve cut values for given signal efficiency
Short_t Min(Short_t a, Short_t b)
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
std::vector< PDF * > * fVarPdfS
Double_t GetSumOfWeights(void) const
return the sum of event (node) weights
Volume for BinarySearchTree.
std::vector< TH1 * > * fVarHistS_smooth
const TString & GetInputVar(Int_t i) const
Double_t EstimatorFunction(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA
std::vector< Double_t > * fMeanB
Double_t Run()
estimator function interface for fitting
void SetIPythonInteractive(bool *ExitFromTraining, UInt_t *fIPyMaxIter_, UInt_t *fIPyCurrentIter_)
std::vector< EFitParameters > * fFitParams
void Init(void)
default initialisation called by all constructors
const Event * GetEvent() const
void CreateVariablePDFs(void)
for PDF method: create efficiency reference histograms and PDFs
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
BinarySearchTree * fBinaryTreeS
void GetEffsfromSelection(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from event counting for given cut sample ...
virtual void ParseOptions()
options parser
DataSetInfo & DataInfo() const
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
std::vector< TH1 * > * fVarHistB_smooth
Class that contains all the data information.
void MatchCutsToPars(std::vector< Double_t > &, Double_t *, Double_t *)
translates cuts into parameters
PDF wrapper for histograms; uses user-defined spline interpolation.
std::vector< PDF * > * fVarPdfB
void Train(void)
training method: here the cuts are optimised for the training sample
Float_t Max(Types::ESBType sb, UInt_t var)
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
std::vector< Int_t > * fRangeSign
Float_t Min(Types::ESBType sb, UInt_t var)
const char * GetName() const
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
The TMVA::Interval Class.
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
char * Form(const char *fmt,...)
void GetEffsfromPDFs(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from PDFs for given cut sample
const TString & GetMethodName() const
virtual Double_t Eval(Double_t x) const =0
TSpline * fSplTrainEffBvsS
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
void DeclareOptions()
define the options (their key words) that can be set in the option string.
Fitter using Monte Carlo sampling of parameters.
virtual const char * GetPath() const
Returns the full path of the directory.
Linear interpolation of TGraph.
void SetNormalised(Bool_t norm)
UInt_t GetNVariables() const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
virtual Int_t FindBin(Double_t x)
Find bin number corresponding to abscissa x.
Bool_t IgnoreEventsWithNegWeightsInTraining() const
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
Multivariate optimisation of signal efficiency for given background efficiency, applying rectangular ...
EFitMethodType fFitMethod
std::vector< Double_t > * fMeanS
Bool_t WriteOptionsReference() const
Float_t RMS(Types::ESBType sb, UInt_t var)
Bool_t IsNormalised() const
TH1 * GetHist(const TString &alias) const
void ReadWeightsFromStream(std::istream &i)
read the cuts from stream
static RooMathCoreReg dummy
void SetCurrentType(Types::ETreeType type) const
Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &)
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
VariableInfo & GetVariableInfo(Int_t i)
void AddPreDefVal(const T &)
virtual Double_t Uniform(Double_t x1=1)
Returns a uniform deviate on the interval (0, x1).
std::vector< Double_t > * fRmsB
const TString & GetOptions() const
Float_t Mean(Types::ESBType sb, UInt_t var)
you should not use this method at all Int_t Int_t z
#define REGISTER_METHOD(CLASS)
for example
virtual void SetPoint(Int_t i, Double_t x, Double_t y)
Set x and y values for point number i.
virtual ~MethodCuts(void)
destructor
IPythonInteractive * fInteractive
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
TString GetMethodTypeName() const
Class that is the base-class for a vector of result.
Short_t Max(Short_t a, Short_t b)
A Graph is a graphics object made of two arrays X and Y with npoints each.
static const Double_t fgMaxAbsCutVal
void ProcessOptions()
process user options.
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Bool_t IsSignal(const Event *ev) const
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
void PrintCuts(Double_t effS) const
print cuts
Types::EAnalysisType GetAnalysisType() const
void Store(TObject *obj, const char *alias=0)
Double_t Sqrt(Double_t x)
Double_t SearchVolume(Volume *, std::vector< const TMVA::BinarySearchTreeNode *> *events=0)
search the whole tree and add up all weights of events that lie within the given volume ...
const TString & GetTestvarName() const
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Double_t GetTrainingEfficiency(const TString &)
Overloaded function to create background efficiency (rejection) versus signal efficiency plot (first ...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Cuts can only handle classification with 2 classes.
std::vector< Interval * > fCutRange
Fitter using a Genetic Algorithm.
void CheckForUnusedOptions() const
checks for unused options in option string
Timing information for training and evaluation of MVA methods.
BinarySearchTree * fBinaryTreeB
A simple Binary search tree including a volume search method.
Fitter using a Simulated Annealing Algorithm.
std::vector< TH1 * > * fVarHistB
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
const char * Data() const