138 const
Double_t TMVA::MethodCuts::fgMaxAbsCutVal = 1.0e30;
148 fFitMethod ( kUseGeneticAlgorithm ),
149 fEffMethod ( kUseEventSelection ),
174 fVarHistS_smooth( 0 ),
175 fVarHistB_smooth( 0 ),
186 const TString& theWeightFile) :
188 fFitMethod ( kUseGeneticAlgorithm ),
189 fEffMethod ( kUseEventSelection ),
214 fVarHistS_smooth( 0 ),
215 fVarHistB_smooth( 0 ),
334 DeclareOptionRef(
fFitMethodS =
"GA",
"FitMethod",
"Minimisation Method (GA, SA, and MC are the primary methods to be used; the others have been introduced for testing purposes and are depreciated)");
378 Log() <<
kWARNING <<
"Normalisation of the input variables for cut optimisation is not" <<
Endl;
379 Log() <<
kWARNING <<
"supported because this provides intransparent cut values, and no" <<
Endl;
380 Log() <<
kWARNING <<
"improvement in the performance of the algorithm." <<
Endl;
381 Log() <<
kWARNING <<
"Please remove \"Normalise\" option from booking option string" <<
Endl;
382 Log() <<
kWARNING <<
"==> Will reset normalisation flag to \"False\"" <<
Endl;
387 Log() <<
kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: " 389 <<
" --> Please remove \"IgnoreNegWeightsInTraining\" option from booking string." 399 Log() <<
kWARNING <<
"poor performance of MINUIT in MethodCuts; preferred fit method: GA" <<
Endl;
409 Log() <<
kINFO <<
Form(
"Use optimization method: \"%s\"",
414 Log() <<
kINFO <<
Form(
"Use efficiency computation method: \"%s\"",
431 <<
"\' for fit parameter option " <<
Form(
"VarProp[%i]",ivar) <<
Endl;
433 (*fFitParams)[ivar] = theFitP;
437 <<
"\" cuts for variable: " <<
"'" << (*fInputVars)[ivar] <<
"'" <<
Endl;
451 Log() <<
kFATAL <<
"<Eval_Cuts> fCutMin/Max have zero pointer. " 452 <<
"Did you book Cuts ?" <<
Endl;
461 if (ibin < 0 ) ibin = 0;
469 return passed ? 1. : 0. ;
479 std::vector<Double_t> cutsMin;
480 std::vector<Double_t> cutsMax;
486 std::vector<TString>* varVec = 0;
489 varVec =
new std::vector<TString>;
490 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
491 varVec->push_back(
DataInfo().GetVariableInfo(ivar).GetLabel() );
500 varVec =
new std::vector<TString>;
501 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
502 varVec->push_back(
DataInfo().GetVariableInfo(ivar).GetLabel() +
" [transformed]" );
507 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
508 if ((
UInt_t)(*varVec)[ivar].Length() > maxL) maxL = (*varVec)[ivar].Length();
510 UInt_t maxLine = 20+maxL+16;
512 for (
UInt_t i=0; i<maxLine; i++)
Log() <<
"-";
514 Log() <<
kHEADER <<
"Cut values for requested signal efficiency: " << trueEffS <<
Endl;
517 Log() <<
kINFO <<
"Transformation applied to input variables : \"" 522 <<
" transformations applied in transformation chain; cuts applied on transformed quantities ] " <<
Endl;
525 Log() <<
kINFO <<
"Transformation applied to input variables : None" <<
Endl;
527 for (
UInt_t i=0; i<maxLine; i++)
Log() <<
"-";
529 for (
UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
531 <<
"Cut[" << std::setw(2) << ivar <<
"]: " 532 << std::setw(10) << cutsMin[ivar]
534 << std::setw(maxL) << (*varVec)[ivar]
536 << std::setw(10) << cutsMax[ivar] <<
Endl;
538 for (
UInt_t i=0; i<maxLine; i++)
Log() <<
"-";
550 std::vector<Double_t> cMin(
GetNvar() );
551 std::vector<Double_t> cMax(
GetNvar() );
554 cutMin[ivar] = cMin[ivar];
555 cutMax[ivar] = cMax[ivar];
564 std::vector<Double_t>& cutMin,
565 std::vector<Double_t>& cutMax )
const 574 if (ibin < 0 ) ibin = 0;
580 cutMin.push_back(
fCutMin[ivar][ibin] );
581 cutMax.push_back(
fCutMax[ivar][ibin] );
632 std::vector<TH1F*> signalDist, bkgDist;
650 std::vector<Interval*> ranges;
707 for (
UInt_t ivar=0; ivar<ranges.size(); ivar++)
delete ranges[ivar];
718 Int_t nsamples =
Int_t(0.5*nevents*(nevents - 1));
723 for (
Int_t ievt1=0; ievt1<nevents; ievt1++) {
724 for (
Int_t ievt2=ievt1+1; ievt2<nevents; ievt2++) {
739 Int_t nsamples = 200000;
741 DeclareOptionRef( nsamples,
"SampleSize",
"Number of Monte-Carlo-Event samples" );
742 DeclareOptionRef( seed,
"Seed",
"Seed for the random generator (0 takes random seeds)" );
755 Log() <<
kINFO <<
"Running Monte-Carlo-Event sampling over " << nsamples <<
" events" <<
Endl;
756 std::vector<Double_t> pars( 2*
GetNvar() );
758 for (
Int_t itoy=0; itoy<nsamples; itoy++) {
777 evt1 = ev1->GetValue( ivar );
783 if (nbreak++ > 10000)
Log() <<
kFATAL <<
"<MCEvents>: could not find signal events" 784 <<
" after 10000 trials - do you have signal events in your sample ?" 790 if (evt1 > evt2) {
Double_t z = evt1; evt1 = evt2; evt2 =
z; }
792 pars[2*ivar+1] = evt2 - evt1;
847 if (!
DataInfo().IsSignal(ev1))
return -1;
850 if (!
DataInfo().IsSignal(ev2))
return -1;
856 for (
Int_t ivar=0; ivar<nvar; ivar++) {
862 std::vector<Double_t> pars;
863 for (
Int_t ivar=0; ivar<nvar; ivar++) {
866 if (evt1[ivar] < evt2[ivar]) {
875 pars.push_back( cutMin );
876 pars.push_back( cutMax - cutMin );
936 Double_t average = 0.5*(effBH_left + effBH_right);
937 if (effBH < effB) average = effBH;
941 eta = ( -
TMath::Abs(effBH-average) + (1.0 - (effBH - effB))) / (1.0 + effS);
948 if (effBH < 0 || effBH > effB) {
970 penalty+=4.*diff*diff;
973 if (effS<1.
e-4)
return 10.0+penalty;
974 else return 10.*(1.-10.*effS);
987 cutMin[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] : pars[ipar] - pars[ipar+1];
988 cutMax[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] + pars[ipar+1] : pars[ipar];
998 if (ibin < 1 || ibin >
fNbins)
Log() <<
kFATAL <<
"::MatchCutsToPars: bin error: " 1004 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
1005 cutMin[ivar] = cutMinAll[ivar][ibin-1];
1006 cutMax[ivar] = cutMaxAll[ivar][ibin-1];
1021 Int_t ipar = 2*ivar;
1022 pars[ipar] = ((*fRangeSign)[ivar] > 0) ? cutMin[ivar] : cutMax[ivar];
1023 pars[ipar+1] = cutMax[ivar] - cutMin[ivar];
1037 effS *= (*fVarPdfS)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
1038 effB *= (*fVarPdfB)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
1044 if( !
fNegEffWarning )
Log() <<
kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1049 if( !
fNegEffWarning )
Log() <<
kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1077 if (nTotS == 0 && nTotB == 0) {
1078 Log() <<
kFATAL <<
"<GetEffsfromSelection> fatal error in zero total number of events:" 1079 <<
" nTotS, nTotB: " << nTotS <<
" " << nTotB <<
" ***" <<
Endl;
1086 Log() <<
kWARNING <<
"<ComputeEstimator> zero number of signal events" <<
Endl;
1088 else if (nTotB == 0) {
1091 Log() <<
kWARNING <<
"<ComputeEstimator> zero number of background events" <<
Endl;
1101 if( !
fNegEffWarning )
Log() <<
kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1106 if( !
fNegEffWarning )
Log() <<
kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." <<
Endl;
1132 if( val > minVal ) minVal = val;
1133 if( val < maxVal ) maxVal = val;
1139 TString histTitle = (*fInputVars)[ivar] +
" signal training";
1140 TString histName = (*fInputVars)[ivar] +
"_sig";
1151 (*fVarHistS)[ivar] =
new TH1F(histName.
Data(), histTitle.
Data(),
fNbins, minVal, maxVal );
1154 histTitle = (*fInputVars)[ivar] +
" background training";
1155 histName = (*fInputVars)[ivar] +
"_bgd";
1166 (*fVarHistB)[ivar] =
new TH1F(histName.
Data(), histTitle.Data(),
fNbins, minVal, maxVal );
1172 (*fVarHistS)[ivar]->Fill( val );
1174 (*fVarHistB)[ivar]->Fill( val );
1181 (*fVarHistS_smooth)[ivar] = (
TH1F*)(*
fVarHistS)[ivar]->Clone();
1182 histTitle = (*fInputVars)[ivar] +
" signal training smoothed ";
1183 histTitle += nsmooth;
1184 histTitle +=
" times";
1185 histName = (*fInputVars)[ivar] +
"_sig_smooth";
1186 (*fVarHistS_smooth)[ivar]->SetName(histName);
1187 (*fVarHistS_smooth)[ivar]->SetTitle(histTitle);
1190 (*fVarHistS_smooth)[ivar]->Smooth(nsmooth);
1205 (*fVarHistB_smooth)[ivar] = (
TH1F*)(*fVarHistB)[ivar]->
Clone();
1206 histTitle = (*fInputVars)[ivar]+
" background training smoothed ";
1207 histTitle += nsmooth;
1208 histTitle +=
" times";
1209 histName = (*fInputVars)[ivar]+
"_bgd_smooth";
1210 (*fVarHistB_smooth)[ivar]->SetName(histName);
1211 (*fVarHistB_smooth)[ivar]->SetTitle(histTitle);
1214 (*fVarHistB_smooth)[ivar]->Smooth(nsmooth);
1231 istr >> dummy >>
dummy;
1236 istr >> dummy >> dummy >> dummy >> dummy >> dummy >> dummy >> dummyInt >>
dummy ;
1240 Log() <<
kFATAL <<
"<ReadWeightsFromStream> fatal error: mismatch " 1247 Log() <<
kWARNING <<
"Read cuts optimised using sample of MC events" <<
Endl;
1250 Log() <<
kWARNING <<
"Read cuts optimised using sample of MC events" <<
Endl;
1253 Log() <<
kINFO <<
"Read cuts optimised using Genetic Algorithm" <<
Endl;
1256 Log() <<
kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" <<
Endl;
1259 Log() <<
kINFO <<
"Read cuts optimised using Full Event Scan" <<
Endl;
1264 Log() <<
kINFO <<
"in " << fNbins <<
" signal efficiency bins and for " <<
GetNvar() <<
" variables" <<
Endl;
1268 istr.getline(buffer,200);
1269 istr.getline(buffer,200);
1275 TString(
GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
1279 istr >> tmpbin >> tmpeffS >> tmpeffB;
1298 std::vector<Double_t> cutsMin;
1299 std::vector<Double_t> cutsMax;
1305 gTools().
AddComment( wght,
Form(
"Below are the optimised cuts for %i variables: Format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0] ... cutMin[ivar=n-1] cutMax[ivar=n-1]",
GetNvar() ) );
1345 Int_t tmpEffMethod, tmpFitMethod;
1346 gTools().
ReadAttr( wghtnode,
"OptimisationMethod", tmpEffMethod );
1355 Log() <<
kINFO <<
"Read cuts optimised using sample of MC events" <<
Endl;
1358 Log() <<
kINFO <<
"Read cuts optimised using sample of MC-Event events" <<
Endl;
1361 Log() <<
kINFO <<
"Read cuts optimised using Genetic Algorithm" <<
Endl;
1364 Log() <<
kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" <<
Endl;
1367 Log() <<
kINFO <<
"Read cuts optimised using Full Event Scan" <<
Endl;
1402 if (tmpbin-1 >=
fNbins || tmpbin-1 < 0) {
1428 (*fVarHistS)[ivar]->Write();
1429 (*fVarHistB)[ivar]->Write();
1430 (*fVarHistS_smooth)[ivar]->Write();
1431 (*fVarHistB_smooth)[ivar]->Write();
1432 (*fVarPdfS)[ivar]->GetPDFHist()->Write();
1433 (*fVarPdfB)[ivar]->GetPDFHist()->Write();
1455 Log() <<
kFATAL <<
"<GetTrainingEfficiency> wrong number of arguments" 1456 <<
" in string: " << theString
1457 <<
" | required format, e.g., Efficiency:0.05" <<
Endl;
1470 if (results->
GetHist(
"EFF_BVSS_TR")==0) {
1490 results->
Store(eff_bvss_tr,
"EFF_BVSS_TR");
1491 results->
Store(rej_bvss_tr,
"REJ_BVSS_TR");
1498 Int_t nFailedBins=0;
1501 tmpCutMin[ivar] =
fCutMin[ivar][bini-1];
1502 tmpCutMax[ivar] =
fCutMax[ivar][bini-1];
1509 if (effBin != bini){
1510 Log()<<
kVERBOSE <<
"unable to fill efficiency bin " << bini<<
" " << effBin <<
Endl;
1519 if (nFailedBins>0)
Log()<<
kWARNING <<
" unable to fill "<< nFailedBins <<
" efficiency bins " <<
Endl;
1521 delete [] tmpCutMin;
1522 delete [] tmpCutMax;
1532 Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
1533 Int_t nbins_ = 1000;
1536 for (
Int_t bini=1; bini<=nbins_; bini++) {
1538 effS = (bini - 0.5)/
Float_t(nbins_);
1542 if ((effB - effBref)*(effB_ - effBref) < 0)
break;
1547 return 0.5*(effS + effS_);
1572 Log() <<
kFATAL <<
"<GetEfficiency> wrong number of arguments" 1573 <<
" in string: " << theString
1574 <<
" | required format, e.g., Efficiency:0.05, or empty string" <<
Endl;
1589 if (results->
GetHist(
"MVA_EFF_BvsS")==0) {
1613 results->
Store(eff_BvsS,
"MVA_EFF_BvsS");
1614 results->
Store(rej_BvsS);
1623 results->
Store(eff_s,
"MVA_S");
1624 results->
Store(eff_b,
"MVA_B");
1636 tmpCutMin[ivar] =
fCutMin[ivar][bini-1];
1637 tmpCutMax[ivar] =
fCutMax[ivar][bini-1];
1642 tmpBvsS->
SetPoint(bini, effS, effB);
1649 delete [] tmpCutMin;
1650 delete [] tmpCutMax;
1666 Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
1667 Int_t nbins_ = 1000;
1673 for (
Int_t bini=1; bini<=nbins_; bini++) {
1676 effS = (bini - 0.5)/
Float_t(nbins_);
1678 integral += (1.0 - effB);
1687 for (
Int_t bini=1; bini<=nbins_; bini++) {
1689 effS = (bini - 0.5)/
Float_t(nbins_);
1693 if ((effB - effBref)*(effB_ - effBref) < 0)
break;
1698 effS = 0.5*(effS + effS_);
1715 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
1716 fout <<
"};" << std::endl;
1734 Log() <<
"The optimisation of rectangular cuts performed by TMVA maximises " <<
Endl;
1735 Log() <<
"the background rejection at given signal efficiency, and scans " <<
Endl;
1736 Log() <<
"over the full range of the latter quantity. Three optimisation" <<
Endl;
1737 Log() <<
"methods are optional: Monte Carlo sampling (MC), a Genetics" <<
Endl;
1738 Log() <<
"Algorithm (GA), and Simulated Annealing (SA). GA and SA are" <<
Endl;
1739 Log() <<
"expected to perform best." <<
Endl;
1741 Log() <<
"The difficulty to find the optimal cuts strongly increases with" <<
Endl;
1742 Log() <<
"the dimensionality (number of input variables) of the problem." <<
Endl;
1743 Log() <<
"This behavior is due to the non-uniqueness of the solution space."<<
Endl;
1747 Log() <<
"If the dimensionality exceeds, say, 4 input variables, it is " <<
Endl;
1748 Log() <<
"advisable to scrutinize the separation power of the variables," <<
Endl;
1749 Log() <<
"and to remove the weakest ones. If some among the input variables" <<
Endl;
1750 Log() <<
"can be described by a single cut (e.g., because signal tends to be" <<
Endl;
1751 Log() <<
"larger than background), this can be indicated to MethodCuts via" <<
Endl;
1752 Log() <<
"the \"Fsmart\" options (see option string). Choosing this option" <<
Endl;
1753 Log() <<
"reduces the number of requirements for the variable from 2 (min/max)" <<
Endl;
1754 Log() <<
"to a single one (TMVA finds out whether it is to be interpreted as" <<
Endl;
1755 Log() <<
"min or max)." <<
Endl;
1759 Log() << bold <<
"Monte Carlo sampling:" << resbold <<
Endl;
1761 Log() <<
"Apart form the \"Fsmart\" option for the variables, the only way" <<
Endl;
1762 Log() <<
"to improve the MC sampling is to increase the sampling rate. This" <<
Endl;
1763 Log() <<
"is done via the configuration option \"MC_NRandCuts\". The execution" <<
Endl;
1764 Log() <<
"time scales linearly with the sampling rate." <<
Endl;
1766 Log() << bold <<
"Genetic Algorithm:" << resbold <<
Endl;
1768 Log() <<
"The algorithm terminates if no significant fitness increase has" <<
Endl;
1769 Log() <<
"been achieved within the last \"nsteps\" steps of the calculation." <<
Endl;
1770 Log() <<
"Wiggles in the ROC curve or constant background rejection of 1" <<
Endl;
1771 Log() <<
"indicate that the GA failed to always converge at the true maximum" <<
Endl;
1772 Log() <<
"fitness. In such a case, it is recommended to broaden the search " <<
Endl;
1773 Log() <<
"by increasing the population size (\"popSize\") and to give the GA " <<
Endl;
1774 Log() <<
"more time to find improvements by increasing the number of steps" <<
Endl;
1775 Log() <<
"(\"nsteps\")" <<
Endl;
1776 Log() <<
" -> increase \"popSize\" (at least >10 * number of variables)" <<
Endl;
1777 Log() <<
" -> increase \"nsteps\"" <<
Endl;
1779 Log() << bold <<
"Simulated Annealing (SA) algorithm:" << resbold <<
Endl;
1781 Log() <<
"\"Increasing Adaptive\" approach:" <<
Endl;
1783 Log() <<
"The algorithm seeks local minima and explores their neighborhood, while" <<
Endl;
1784 Log() <<
"changing the ambient temperature depending on the number of failures" <<
Endl;
1785 Log() <<
"in the previous steps. The performance can be improved by increasing" <<
Endl;
1786 Log() <<
"the number of iteration steps (\"MaxCalls\"), or by adjusting the" <<
Endl;
1787 Log() <<
"minimal temperature (\"MinTemperature\"). Manual adjustments of the" <<
Endl;
1788 Log() <<
"speed of the temperature increase (\"TemperatureScale\" and \"AdaptiveSpeed\")" <<
Endl;
1789 Log() <<
"to individual data sets should also help. Summary:" << brk <<
Endl;
1790 Log() <<
" -> increase \"MaxCalls\"" << brk <<
Endl;
1791 Log() <<
" -> adjust \"MinTemperature\"" << brk <<
Endl;
1792 Log() <<
" -> adjust \"TemperatureScale\"" << brk <<
Endl;
1793 Log() <<
" -> adjust \"AdaptiveSpeed\"" <<
Endl;
1795 Log() <<
"\"Decreasing Adaptive\" approach:" <<
Endl;
1797 Log() <<
"The algorithm calculates the initial temperature (based on the effect-" <<
Endl;
1798 Log() <<
"iveness of large steps) and the multiplier that ensures to reach the" <<
Endl;
1799 Log() <<
"minimal temperature with the requested number of iteration steps." <<
Endl;
1800 Log() <<
"The performance can be improved by adjusting the minimal temperature" <<
Endl;
1801 Log() <<
" (\"MinTemperature\") and by increasing number of steps (\"MaxCalls\"):" << brk <<
Endl;
1802 Log() <<
" -> increase \"MaxCalls\"" << brk <<
Endl;
1803 Log() <<
" -> adjust \"MinTemperature\"" <<
Endl;
1805 Log() <<
"Other kernels:" <<
Endl;
1807 Log() <<
"Alternative ways of counting the temperature change are implemented. " <<
Endl;
1808 Log() <<
"Each of them starts with the maximum temperature (\"MaxTemperature\")" <<
Endl;
1809 Log() <<
"and descreases while changing the temperature according to a given" <<
Endl;
1810 Log() <<
"prescription:" << brk <<
Endl;
1811 Log() <<
"CurrentTemperature =" << brk <<
Endl;
1812 Log() <<
" - Sqrt: InitialTemperature / Sqrt(StepNumber+2) * TemperatureScale" << brk <<
Endl;
1813 Log() <<
" - Log: InitialTemperature / Log(StepNumber+2) * TemperatureScale" << brk <<
Endl;
1814 Log() <<
" - Homo: InitialTemperature / (StepNumber+2) * TemperatureScale" << brk <<
Endl;
1815 Log() <<
" - Sin: (Sin(StepNumber / TemperatureScale) + 1) / (StepNumber + 1)*InitialTemperature + Eps" << brk <<
Endl;
1816 Log() <<
" - Geo: CurrentTemperature * TemperatureScale" <<
Endl;
1818 Log() <<
"Their performance can be improved by adjusting initial temperature" <<
Endl;
1819 Log() <<
"(\"InitialTemperature\"), the number of iteration steps (\"MaxCalls\")," <<
Endl;
1820 Log() <<
"and the multiplier that scales the termperature descrease" <<
Endl;
1821 Log() <<
"(\"TemperatureScale\")" << brk <<
Endl;
1822 Log() <<
" -> increase \"MaxCalls\"" << brk <<
Endl;
1823 Log() <<
" -> adjust \"InitialTemperature\"" << brk <<
Endl;
1824 Log() <<
" -> adjust \"TemperatureScale\"" << brk <<
Endl;
1825 Log() <<
" -> adjust \"KernelTemperature\"" <<
Endl;
std::vector< Double_t > * fRmsS
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
cut evaluation: returns 1.0 if event passed, 0.0 otherwise
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
Random number generator class based on M.
MsgLogger & Endl(MsgLogger &ml)
TH1 * GetHist(const TString &alias) const
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
#define REGISTER_METHOD(CLASS)
for example
Double_t ComputeEstimator(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA there are two requirements: 1) the signal efficiency m...
void TestClassification()
nothing to test
Collectable string class.
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
MethodCuts(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="MC:150:10000:")
standard constructor
const char * GetName() const
Bool_t IgnoreEventsWithNegWeightsInTraining() const
void CheckForUnusedOptions() const
checks for unused options in option string
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void MatchParsToCuts(const std::vector< Double_t > &, Double_t *, Double_t *)
translates parameters into cuts
void Clone(Ssiz_t nc)
Make self a distinct copy with capacity of at least tot, where tot cannot be smaller than the current...
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Bool_t IsNormalised() const
std::vector< TH1 * > * fVarHistS
tomato 1-D histogram with a float per channel (see TH1 documentation)}
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Short_t Min(Short_t a, Short_t b)
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
std::vector< PDF * > * fVarPdfS
const TString & GetInputVar(Int_t i) const
virtual const char * GetPath() const
Returns the full path of the directory.
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
const TString & GetMethodName() const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
std::vector< TH1 * > * fVarHistS_smooth
const char * Data() const
Types::EAnalysisType GetAnalysisType() const
Double_t EstimatorFunction(std::vector< Double_t > &)
returns estimator for "cut fitness" used by GA
std::vector< Double_t > * fMeanB
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Double_t Run()
estimator function interface for fitting
void SetIPythonInteractive(bool *ExitFromTraining, UInt_t *fIPyMaxIter_, UInt_t *fIPyCurrentIter_)
Bool_t IsSignal(const Event *ev) const
std::vector< EFitParameters > * fFitParams
void Init(void)
default initialisation called by all constructors
void CreateVariablePDFs(void)
for PDF method: create efficiency reference histograms and PDFs
BinarySearchTree * fBinaryTreeS
Double_t GetCuts(Double_t effS, std::vector< Double_t > &cutMin, std::vector< Double_t > &cutMax) const
retrieve cut values for given signal efficiency
void GetEffsfromSelection(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from event counting for given cut sample ...
virtual void ParseOptions()
options parser
std::vector< TH1 * > * fVarHistB_smooth
void MatchCutsToPars(std::vector< Double_t > &, Double_t *, Double_t *)
translates cuts into parameters
Double_t SearchVolume(Volume *, std::vector< const TMVA::BinarySearchTreeNode * > *events=0)
search the whole tree and add up all weigths of events that lie within the given voluem ...
Double_t GetSumOfWeights(void) const
return the sum of event (node) weights
std::vector< PDF * > * fVarPdfB
UInt_t GetNVariables() const
void Train(void)
training method: here the cuts are optimised for the training sample
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Float_t Max(Types::ESBType sb, UInt_t var)
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
std::vector< Int_t > * fRangeSign
Float_t Min(Types::ESBType sb, UInt_t var)
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
void PrintCuts(Double_t effS) const
print cuts
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
const Event * GetEvent() const
char * Form(const char *fmt,...)
void GetEffsfromPDFs(Double_t *cutMin, Double_t *cutMax, Double_t &effS, Double_t &effB)
compute signal and background efficiencies from PDFs for given cut sample
TSpline * fSplTrainEffBvsS
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: Method <strin...
void SetNormalised(Bool_t norm)
virtual Int_t FindBin(Double_t x)
Find bin number corresponding to abscissa x.
void SetCurrentType(Types::ETreeType type) const
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual Int_t GetSize() const
EFitMethodType fFitMethod
std::vector< Double_t > * fMeanS
void WriteMonitoringHistosToFile(void) const
write histograms and PDFs to file for monitoring purposes
Float_t RMS(Types::ESBType sb, UInt_t var)
void ReadWeightsFromStream(std::istream &i)
read the cuts from stream
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
static RooMathCoreReg dummy
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &)
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
DataSetInfo & DataInfo() const
VariableInfo & GetVariableInfo(Int_t i)
void AddPreDefVal(const T &)
virtual Double_t Uniform(Double_t x1=1)
Returns a uniform deviate on the interval (0, x1).
std::vector< Double_t > * fRmsB
void GetHelpMessage() const
get help message text
Float_t Mean(Types::ESBType sb, UInt_t var)
you should not use this method at all Int_t Int_t z
virtual Double_t Eval(Double_t x) const =0
Abstract ClassifierFactory template that handles arbitrary types.
virtual void SetPoint(Int_t i, Double_t x, Double_t y)
Set x and y values for point number i.
virtual ~MethodCuts(void)
destructor
IPythonInteractive * fInteractive
Double_t Fill(const std::vector< TMVA::Event * > &events, const std::vector< Int_t > &theVars, Int_t theType=-1)
create the search tree from the event collection using ONLY the variables specified in "theVars" ...
Short_t Max(Short_t a, Short_t b)
const TString & GetOptions() const
A Graph is a graphics object made of two arrays X and Y with npoints each.
static const Double_t fgMaxAbsCutVal
void ProcessOptions()
process user options sanity check, do not allow the input variables to be normalised, because this only creates problems when interpreting the cuts
Bool_t WriteOptionsReference() const
void Store(TObject *obj, const char *alias=0)
Double_t Sqrt(Double_t x)
TString GetMethodTypeName() const
Double_t GetTrainingEfficiency(const TString &)
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Cuts can only handle classification with 2 classes.
std::vector< Interval * > fCutRange
BinarySearchTree * fBinaryTreeB
std::vector< TH1 * > * fVarHistB
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
const TString & GetTestvarName() const