106 std::vector<TTreeFormula*>::iterator formIt = fCatFormulas.begin();
107 std::vector<TTreeFormula*>::iterator lastF = fCatFormulas.end();
108 for(;formIt!=lastF; ++formIt)
delete *formIt;
118 std::vector<IMethod*>::iterator itrMethod = fMethods.begin();
121 for(; itrMethod != fMethods.end(); ++itrMethod ) {
122 if ( !(*itrMethod)->HasAnalysisType(
type, numberClasses, numberTargets) )
144 std::string addedMethodName(
Types::Instance().GetMethodName(theMethod).Data());
146 Log() << kINFO <<
"Adding sub-classifier: " << addedMethodName <<
"::" << theTitle <<
Endl;
148 DataSetInfo& dsi = CreateCategoryDSI(theCut, theVariables, theTitle);
153 if(method==0)
return 0;
181 fMethods.push_back(method);
182 fCategoryCuts.push_back(theCut);
183 fVars.push_back(theVariables);
188 fCategorySpecIdx.push_back(newSpectatorIndex);
205 TString dsiName=theTitle+
"_dsi";
211 fDataSetManager->AddDataSetInfo(*dsi);
214 std::vector<VariableInfo>::iterator itrVarInfo;
226 std::vector<UInt_t> varMap;
230 std::vector<TString>::iterator itrVariables;
234 for (itrVariables =
variables.begin(); itrVariables !=
variables.end(); ++itrVariables) {
239 if((*itrVariables==itrVarInfo->GetLabel()) ) {
243 varMap.push_back(counter);
251 if((*itrVariables==itrVarInfo->GetLabel()) ) {
255 varMap.push_back(counter);
263 Log() << kFATAL <<
"The variable " << itrVariables->Data() <<
" was not found and could not be added " <<
Endl;
269 if (theVariables==
"") {
277 fVarMaps.push_back(varMap);
283 for (
UInt_t i=0; i<nClasses; i++) {
287 dsi->
AddCut(theCut,className);
298 splitOpt +=
":ScaleWithPreselEff";
321 std::vector<VariableInfo>::const_iterator viIt;
326 for (viIt = vars.begin(); viIt != vars.end(); ++viIt)
327 if( viIt->GetExternalLink() == 0 ) {
328 hasAllExternalLinks =
kFALSE;
331 for (viIt = specs.begin(); viIt != specs.end(); ++viIt)
332 if( viIt->GetExternalLink() == 0 ) {
333 hasAllExternalLinks =
kFALSE;
337 if(!hasAllExternalLinks)
return;
345 fCatTree =
new TTree(
Form(
"Circ%s",GetMethodName().Data()),
"Circular Tree for categorization");
346 fCatTree->SetCircular(1);
349 for (viIt = vars.begin(); viIt != vars.end(); ++viIt) {
353 for (viIt = specs.begin(); viIt != specs.end(); ++viIt) {
359 for(
UInt_t cat=0; cat!=fCategoryCuts.size(); ++cat) {
360 fCatFormulas.push_back(
new TTreeFormula(
Form(
"Category_%i",cat), fCategoryCuts[cat].GetTitle(), fCatTree));
375 Log() << kINFO <<
"Train all sub-classifiers for "
379 if (fMethods.empty()) {
380 Log() << kINFO <<
"...nothing found to train" <<
Endl;
384 std::vector<IMethod*>::iterator itrMethod;
387 for (itrMethod = fMethods.begin(); itrMethod != fMethods.end(); ++itrMethod ) {
395 Log() << kWARNING <<
"Method " << mva->
GetMethodTypeName() <<
" is not capable of handling " ;
400 itrMethod = fMethods.erase( itrMethod );
405 Log() << kINFO <<
"Train method: " << mva->
GetMethodName() <<
" for "
408 Log() << kINFO <<
"Training finished" <<
Endl;
413 <<
" not trained (training tree has less entries ["
417 Log() << kERROR <<
" w/o training/test events for that category, I better stop here and let you fix " <<
Endl;
418 Log() << kFATAL <<
"that one first, otherwise things get too messy later ... " <<
Endl;
426 Log() << kINFO <<
"Begin ranking of input variables..." <<
Endl;
427 for (itrMethod = fMethods.begin(); itrMethod != fMethods.end(); ++itrMethod) {
430 const Ranking* ranking = (*itrMethod)->CreateRanking();
434 Log() << kINFO <<
"No variable ranking supplied by classifier: "
451 for (
UInt_t i=0; i<fMethods.size(); i++) {
477 Log() << kINFO <<
"Recreating sub-classifiers from XML-file " <<
Endl;
480 for (
UInt_t i=0; i<nSubMethods; i++) {
486 methodType = fullMethodName(0,fullMethodName.
Index(
"::"));
487 if (methodType.
Contains(
" ")) methodType = methodType(methodType.
Last(
' ')+1,methodType.
Length());
490 titleLength = fullMethodName.
Length()-fullMethodName.
Index(
"::")-2;
491 methodTitle = fullMethodName(fullMethodName.
Index(
"::")+2,titleLength);
494 DataSetInfo& dsi = CreateCategoryDSI(
TCut(theCutString), theVariables, methodTitle);
500 Log() << kFATAL <<
"Could not create sub-method " << method <<
" from XML." <<
Endl;
505 fMethods.push_back(method);
506 fCategoryCuts.push_back(
TCut(theCutString));
507 fVars.push_back(theVariables);
511 UInt_t spectatorIdx = 10000;
516 std::vector<VariableInfo>::iterator itrVarInfo;
517 TString specName=
Form(
"%s_cat%i", GetName(),(
int)fCategorySpecIdx.size()+1);
519 for (itrVarInfo = spectators.begin(); itrVarInfo != spectators.end(); ++itrVarInfo, ++counter) {
520 if((specName==itrVarInfo->GetLabel()) || (specName==itrVarInfo->GetExpression())) {
521 spectatorIdx=counter;
522 fCategorySpecIdx.push_back(spectatorIdx);
530 InitCircularTree(DataInfo());
552 Log() <<
"This method allows to define different categories of events. The" <<
Endl;
553 Log() <<
"categories are defined via cuts on the variables. For each" <<
Endl;
554 Log() <<
"category, a different classifier and set of variables can be" <<
Endl;
555 Log() <<
"specified. The categories which are defined for this method must" <<
Endl;
556 Log() <<
"be disjoint." <<
Endl;
575 if (methodIdx>=fCatFormulas.size()) {
576 Log() << kFATAL <<
"Large method index " << methodIdx <<
", number of category formulas = "
577 << fCatFormulas.size() <<
Endl;
586 if (methodIdx>=fCategorySpecIdx.size()) {
587 Log() << kFATAL <<
"Unknown method index " << methodIdx <<
" maximum allowed index="
588 << fCategorySpecIdx.size() <<
Endl;
590 UInt_t spectatorIdx = fCategorySpecIdx[methodIdx];
592 Bool_t pass = (specVal>0.5);
602 if (fMethods.empty())
return 0;
605 const Event* ev = GetEvent();
608 Int_t suitableCutsN = 0;
610 for (
UInt_t i=0; i<fMethods.size(); ++i) {
611 if (PassesCut(ev, i)) {
617 if (suitableCutsN == 0) {
618 Log() << kWARNING <<
"Event does not lie within the cut of any sub-classifier." <<
Endl;
622 if (suitableCutsN > 1) {
623 Log() << kFATAL <<
"The defined categories are not disjoint." <<
Endl;
629 Double_t mvaValue =
dynamic_cast<MethodBase*
>(fMethods[methodToUse])->GetMvaValue(ev,err,errUpper);
632 Log() << kDEBUG <<
"Event is for method " << methodToUse <<
" spectator is " << ev->
GetSpectator(0) <<
" "
633 << fVarMaps[0][0] <<
" classID " << DataInfo().IsSignal(ev) <<
" value " << mvaValue
634 <<
" type " << Data()->GetCurrentType() <<
Endl;
646 std::vector<Double_t> result;
648 Info(
"GetMVaValues",
"Evaluate MethodCategory for %d events type %d on the dataset %s",
int(lastEvt - firstEvt),
649 (
int)Data()->GetCurrentType(), DataInfo().GetName());
651 if (fMethods.empty())
657 std::vector<std::vector<Double_t>> mvaValues(fMethods.size());
658 for (
UInt_t i = 0; i < fMethods.size(); ++i) {
660 for (
UInt_t iev = firstEvt; iev < lastEvt; ++iev) {
661 data->SetCurrentEvent(iev);
662 const Event *ev = GetEvent(data->GetEvent());
666 mvaValues[i] =
dynamic_cast<MethodBase *
>(fMethods[i])->GetDataMvaValues(data,firstEvt, lastEvt, logProgress);
670 result.resize(lastEvt - firstEvt);
672 for (
UInt_t iev = firstEvt; iev < lastEvt; ++iev)
674 data->SetCurrentEvent(iev);
676 const Event *ev = GetEvent(data->GetEvent());
679 Int_t suitableCutsN = 0;
681 for (
UInt_t i = 0; i < fMethods.size(); ++i) {
682 if (PassesCut(ev, i)) {
688 if (suitableCutsN == 0) {
689 Log() << kWARNING <<
"Event does not lie within the cut of any sub-classifier." <<
Endl;
693 if (suitableCutsN > 1) {
694 Log() << kFATAL <<
"The defined categories are not disjoint." <<
Endl;
699 result[iev - firstEvt] = mvaValues[methodToUse][iev - firstEvt];
712 if (fMethods.empty())
716 const Event *ev = GetEvent();
719 Int_t suitableCutsN = 0;
721 for (
UInt_t i = 0; i < fMethods.size(); ++i) {
722 if (PassesCut(ev, i)) {
728 if (suitableCutsN == 0) {
729 Log() << kWARNING <<
"Event does not lie within the cut of any sub-classifier." <<
Endl;
733 if (suitableCutsN > 1) {
734 Log() << kFATAL <<
"The defined categories are not disjoint." <<
Endl;
739 Log() << kFATAL <<
"method not found in Category Regression method" <<
Endl;
757 const Event* ev = GetEvent();
760 Int_t suitableCutsN = 0;
762 for (
UInt_t i=0; i<fMethods.size(); ++i) {
763 if (PassesCut(ev, i)) {
769 if (suitableCutsN == 0) {
770 Log() << kWARNING <<
"Event does not lie within the cut of any sub-classifier." <<
Endl;
774 if (suitableCutsN > 1) {
775 Log() << kFATAL <<
"The defined categories are not disjoint." <<
Endl;
780 Log() << kFATAL <<
"method not found in Category Regression method" <<
Endl;
#define MinNoTrainingEvents
#define REGISTER_METHOD(CLASS)
for example
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
char * Form(const char *fmt,...)
A specialized string object used for TTree selections.
TDirectory::TContext keeps track and restore the current directory.
Describe directory structure in memory.
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
virtual void ParseOptions()
options parser
Class that contains all the data information.
const TString GetWeightExpression(Int_t i) const
std::vector< VariableInfo > & GetVariableInfos()
void SetSplitOptions(const TString &so)
ClassInfo * AddClass(const TString &className)
const TString & GetNormalization() const
std::vector< VariableInfo > & GetSpectatorInfos()
TDirectory * GetRootDir() const
void SetNormalization(const TString &norm)
UInt_t GetNClasses() const
const TString & GetSplitOptions() const
UInt_t GetNTargets() const
VariableInfo & AddTarget(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis
VariableInfo & AddSpectator(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, char type='F', Bool_t normalized=kTRUE, void *external=0)
add a spectator (can be a complex expression) to the set of spectator variables used in the MV analys...
ClassInfo * GetClassInfo(Int_t clNum) const
const TCut & GetCut(Int_t i) const
void SetCut(const TCut &cut, const TString &className)
set the cut for the classes
VariableInfo & AddVariable(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0, char varType='F', Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis
std::vector< VariableInfo > & GetTargetInfos()
void SetRootDir(TDirectory *d)
void SetWeightExpression(const TString &exp, const TString &className="")
set the weight expressions for the classes if class name is specified, set only for this class if cla...
void AddCut(const TCut &cut, const TString &className)
set the cut for the classes
Long64_t GetNTrainingEvents() const
void SetVariableArrangement(std::vector< UInt_t > *const m) const
set the variable arrangement
Float_t GetSpectator(UInt_t ivar) const
return spectator content
Interface for all concrete MVA method implementations.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
Virtual base Class for all MVA method.
virtual const std::vector< Float_t > & GetRegressionValues()
const std::vector< Float_t > & GetRegressionValues(const TMVA::Event *const ev)
void SetSilentFile(Bool_t status)
void SetWeightFileDir(TString fileDir)
set directory of weight file
void WriteStateToXML(void *parent) const
general method used in writing the header of the weight files where the used variables,...
TString GetMethodTypeName() const
void DisableWriting(Bool_t setter)
const char * GetName() const
virtual const std::vector< Float_t > & GetMulticlassValues()
void SetupMethod()
setup of methods
virtual void SetAnalysisType(Types::EAnalysisType type)
const TString & GetMethodName() const
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
DataSetInfo & DataInfo() const
void SetFile(TFile *file)
void ReadStateFromXML(void *parent)
friend class MethodCategory
void SetMethodBaseDir(TDirectory *methodDir)
void SetModelPersistence(Bool_t status)
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Class for categorizing the phase space.
void InitCircularTree(const DataSetInfo &dsi)
initialize the circular tree
void GetHelpMessage() const
Get help message text.
void Init()
initialize the method
Bool_t PassesCut(const Event *ev, UInt_t methodIdx)
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
check whether method category has analysis type the method type has to be the same for all sub-method...
void ProcessOptions()
process user options
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns the mva value of the right sub-classifier
virtual const std::vector< Float_t > & GetMulticlassValues()
returns the mva values of the multi-class right sub-classifier
TMVA::DataSetInfo & CreateCategoryDSI(const TCut &, const TString &, const TString &)
create a DataSetInfo object for a sub-classifier
void DeclareOptions()
options for this method
void AddWeightsXMLTo(void *parent) const
create XML description of Category classifier
const Ranking * CreateRanking()
no ranking
virtual ~MethodCategory(void)
destructor
virtual const std::vector< Float_t > & GetRegressionValues()
returns the mva value of the right sub-classifier
TMVA::IMethod * AddMethod(const TCut &, const TString &theVariables, Types::EMVA theMethod, const TString &theTitle, const TString &theOptions)
adds sub-classifier for a category
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
returns the mva values of the right sub-classifier
void ReadWeightsFromXML(void *wghtnode)
read weights of sub-classifiers of MethodCategory from xml weight file
void Train(void)
train all sub-classifiers
Virtual base class for combining several TMVA method.
Ranking for variables in method (implementation)
virtual void Print() const
get maximum length of variable names
Singleton class for Global types used by TMVA.
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Class for type info of MVA input variable.
const TString & GetExpression() const
void * GetExternalLink() const
virtual const char * GetTitle() const
Returns title of object.
virtual const char * GetName() const
Returns name of object.
const char * Data() const
Ssiz_t Last(char c) const
Find last occurrence of a character c.
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
A TTree represents a columnar dataset.
create variable transformations
void variables(TString dataset, TString fin="TMVA.root", TString dirName="InputVariables_Id", TString title="TMVA Input Variables", Bool_t isRegression=kFALSE, Bool_t useTMVAStyle=kTRUE)
MsgLogger & Endl(MsgLogger &ml)