38 fDataSetInfo(dl->GetDataSetInfo()),
40 fEvents (fDataSetInfo.
GetDataSet()->GetEventCollection())
75 Log() <<
kINFO <<
"Number of variables before transformation: " << nvars <<
Endl;
83 Log() <<
kINFO <<
"Selecting variables whose variance is above threshold value = " << threshold <<
Endl;
86 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
87 Log() <<
kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) <<
"Selected Variables";
88 Log() <<
kINFO << std::setiosflags(std::ios::left) << std::setw(10) <<
"Variance" <<
Endl;
89 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
90 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
91 Double_t variance = vars[ivar].GetVariance();
92 if (variance > threshold)
94 Log() <<
kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << vars[ivar].GetExpression();
95 Log() <<
kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << variance <<
Endl;
96 transformedLoader->
AddVariable(vars[ivar].GetExpression(), vars[ivar].GetVarType());
100 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
106 return transformedLoader;
122 if (x < vars[ivar].GetMin()) vars[ivar].SetMin(x);
123 if (x > vars[ivar].GetMax()) vars[ivar].SetMax(x);
126 if (x < tars[ivar-nvars].GetMin()) tars[ivar-nvars].SetMin(x);
127 if (x > tars[ivar-nvars].GetMax()) tars[ivar-nvars].SetMax(x);
144 UInt_t nevts = events.size();
147 TVectorD x0( nvars+ntgts ); x0 *= 0;
148 TVectorD v0( nvars+ntgts ); v0 *= 0;
151 for (
UInt_t ievt=0; ievt<nevts; ievt++) {
152 const Event* ev = events[ievt];
155 sumOfWeights += weight;
156 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
159 vars[ivar].SetMin(x);
160 vars[ivar].SetMax(x);
165 x0(ivar) += x*weight;
166 x2(ivar) += x*x*weight;
168 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
171 tars[itgt].SetMin(x);
172 tars[itgt].SetMax(x);
177 x0(nvars+itgt) += x*weight;
178 x2(nvars+itgt) += x*x*weight;
182 if (sumOfWeights <= 0) {
183 Log() <<
kFATAL <<
" the sum of event weights calcualted for your input is == 0" 184 <<
" or exactly: " << sumOfWeights <<
" there is obviously some problem..."<<
Endl;
188 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
189 Double_t mean = x0(ivar)/sumOfWeights;
191 vars[ivar].SetMean( mean );
192 if (
x2(ivar)/sumOfWeights - mean*mean < 0) {
193 Log() <<
kFATAL <<
" the RMS of your input variable " << ivar
194 <<
" evaluates to an imaginary number: sqrt("<<
x2(ivar)/sumOfWeights - mean*mean
195 <<
") .. sometimes related to a problem with outliers and negative event weights" 198 vars[ivar].SetRMS(
TMath::Sqrt(
x2(ivar)/sumOfWeights - mean*mean) );
200 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
201 Double_t mean = x0(nvars+itgt)/sumOfWeights;
202 tars[itgt].SetMean( mean );
203 if (
x2(nvars+itgt)/sumOfWeights - mean*mean < 0) {
204 Log() <<
kFATAL <<
" the RMS of your target variable " << itgt
205 <<
" evaluates to an imaginary number: sqrt(" <<
x2(nvars+itgt)/sumOfWeights - mean*mean
206 <<
") .. sometimes related to a problem with outliers and negative event weights" 209 tars[itgt].SetRMS(
TMath::Sqrt(
x2(nvars+itgt)/sumOfWeights - mean*mean) );
213 for (
UInt_t ievt=0; ievt<nevts; ievt++) {
214 const Event* ev = events[ievt];
217 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
219 Double_t mean = vars[ivar].GetMean();
220 v0(ivar) += weight*(x-mean)*(x-mean);
223 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
225 Double_t mean = tars[itgt].GetMean();
226 v0(nvars+itgt) += weight*(x-mean)*(x-mean);
232 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
233 Log() <<
kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) <<
"Variables";
234 Log() <<
kINFO << std::setiosflags(std::ios::left) << std::setw(10) <<
"Variance" <<
Endl;
235 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
238 Log() << std::setprecision(5);
239 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
240 Double_t variance = v0(ivar)/sumOfWeights;
241 vars[ivar].SetVariance( variance );
242 Log() <<
kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << vars[ivar].GetExpression();
243 Log() <<
kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << variance <<
Endl;
248 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
249 Log() <<
kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) <<
"Targets";
250 Log() <<
kINFO << std::setiosflags(std::ios::left) << std::setw(10) <<
"Variance" <<
Endl;
251 Log() <<
kINFO <<
"----------------------------------------------------------------" <<
Endl;
253 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
254 Double_t variance = v0(nvars+itgt)/sumOfWeights;
255 tars[itgt].SetVariance( variance );
256 Log() <<
kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << tars[itgt].GetExpression();
257 Log() <<
kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << variance <<
Endl;
260 Log() <<
kINFO <<
"Set minNorm/maxNorm for variables to: " <<
Endl;
261 Log() << std::setprecision(3);
262 for (
UInt_t ivar=0; ivar<nvars; ivar++)
263 Log() <<
" " << vars[ivar].GetExpression()
264 <<
"\t: [" << vars[ivar].GetMin() <<
"\t, " << vars[ivar].GetMax() <<
"\t] " <<
Endl;
265 Log() <<
kINFO <<
"Set minNorm/maxNorm for targets to: " <<
Endl;
266 Log() << std::setprecision(3);
267 for (
UInt_t itgt=0; itgt<ntgts; itgt++)
268 Log() <<
" " << tars[itgt].GetExpression()
269 <<
"\t: [" << tars[itgt].GetMin() <<
"\t, " << tars[itgt].GetMax() <<
"\t] " <<
Endl;
270 Log() << std::setprecision(5);
278 des->
AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
283 des->
AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
UInt_t GetNVariables() const
MsgLogger & Endl(MsgLogger &ml)
DataSetInfo & GetDataSetInfo()
Int_t GetTargetNameMaxLength() const
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
static const double x2[5]
std::vector< std::vector< double > > Data
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
std::vector< VariableInfo > & GetTargetInfos()
TFileCollection * GetDataSet(const char *ds, const char *server="")
GetDataSet wrapper.
Float_t GetTarget(UInt_t itgt) const
UInt_t GetNTargets() const
DataInputHandler & DataInput()
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
Int_t GetVariableNameMaxLength() const
const TString & GetSplitOptions() const
const TCut & GetCut(Int_t i) const
Double_t Sqrt(Double_t x)
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
DataSet * GetDataSet() const
returns data set
std::vector< VariableInfo > & GetVariableInfos()