88 Log() << kINFO <<
"Number of variables before transformation: " << nvars <<
Endl;
89 std::vector<VariableInfo>& vars =
fDataSetInfo.GetVariableInfos();
96 Log() << kINFO <<
"Selecting variables whose variance is above threshold value = " << threshold <<
Endl;
99 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
100 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) <<
"Selected Variables";
101 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(10) <<
"Variance" <<
Endl;
102 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
103 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
104 Double_t variance = vars[ivar].GetVariance();
105 if (variance > threshold)
107 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << vars[ivar].GetExpression();
108 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << variance <<
Endl;
109 transformedLoader->
AddVariable(vars[ivar].GetExpression(), vars[ivar].GetVarType());
113 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
119 return transformedLoader;
150 const std::vector<TMVA::Event*>& events =
fDataSetInfo.GetDataSet()->GetEventCollection();
154 std::vector<VariableInfo>& vars =
fDataSetInfo.GetVariableInfos();
155 std::vector<VariableInfo>& tars =
fDataSetInfo.GetTargetInfos();
157 UInt_t nevts = events.size();
159 TVectorD x2( nvars+ntgts ); x2 *= 0;
160 TVectorD x0( nvars+ntgts ); x0 *= 0;
164 for (
UInt_t ievt=0; ievt<nevts; ievt++) {
165 const Event* ev = events[ievt];
168 sumOfWeights += weight;
169 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
172 vars[ivar].SetMin(
x);
173 vars[ivar].SetMax(
x);
178 x0(ivar) +=
x*weight;
179 x2(ivar) +=
x*
x*weight;
181 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
184 tars[itgt].SetMin(
x);
185 tars[itgt].SetMax(
x);
190 x0(nvars+itgt) +=
x*weight;
191 x2(nvars+itgt) +=
x*
x*weight;
195 if (sumOfWeights <= 0) {
196 Log() << kFATAL <<
" the sum of event weights calculated for your input is == 0"
197 <<
" or exactly: " << sumOfWeights <<
" there is obviously some problem..."<<
Endl;
201 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
202 Double_t mean = x0(ivar)/sumOfWeights;
204 vars[ivar].SetMean( mean );
205 if (x2(ivar)/sumOfWeights - mean*mean < 0) {
206 Log() << kFATAL <<
" the RMS of your input variable " << ivar
207 <<
" evaluates to an imaginary number: sqrt("<< x2(ivar)/sumOfWeights - mean*mean
208 <<
") .. sometimes related to a problem with outliers and negative event weights"
211 vars[ivar].SetRMS(
TMath::Sqrt( x2(ivar)/sumOfWeights - mean*mean) );
213 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
214 Double_t mean = x0(nvars+itgt)/sumOfWeights;
215 tars[itgt].SetMean( mean );
216 if (x2(nvars+itgt)/sumOfWeights - mean*mean < 0) {
217 Log() << kFATAL <<
" the RMS of your target variable " << itgt
218 <<
" evaluates to an imaginary number: sqrt(" << x2(nvars+itgt)/sumOfWeights - mean*mean
219 <<
") .. sometimes related to a problem with outliers and negative event weights"
222 tars[itgt].SetRMS(
TMath::Sqrt( x2(nvars+itgt)/sumOfWeights - mean*mean) );
226 for (
UInt_t ievt=0; ievt<nevts; ievt++) {
227 const Event* ev = events[ievt];
230 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
232 Double_t mean = vars[ivar].GetMean();
233 v0(ivar) += weight*(
x-mean)*(
x-mean);
236 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
238 Double_t mean = tars[itgt].GetMean();
239 v0(nvars+itgt) += weight*(
x-mean)*(
x-mean);
245 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
246 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) <<
"Variables";
247 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(10) <<
"Variance" <<
Endl;
248 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
251 Log() << std::setprecision(5);
252 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
254 vars[ivar].SetVariance( variance );
255 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << vars[ivar].GetExpression();
256 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << variance <<
Endl;
261 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
262 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) <<
"Targets";
263 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(10) <<
"Variance" <<
Endl;
264 Log() << kINFO <<
"----------------------------------------------------------------" <<
Endl;
266 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
267 Double_t variance =
v0(nvars+itgt)/sumOfWeights;
268 tars[itgt].SetVariance( variance );
269 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << tars[itgt].GetExpression();
270 Log() << kINFO << std::setiosflags(std::ios::left) << std::setw(maxL) << variance <<
Endl;
273 Log() << kINFO <<
"Set minNorm/maxNorm for variables to: " <<
Endl;
274 Log() << std::setprecision(3);
275 for (
UInt_t ivar=0; ivar<nvars; ivar++)
276 Log() <<
" " << vars[ivar].GetExpression()
277 <<
"\t: [" << vars[ivar].GetMin() <<
"\t, " << vars[ivar].GetMax() <<
"\t] " <<
Endl;
278 Log() << kINFO <<
"Set minNorm/maxNorm for targets to: " <<
Endl;
279 Log() << std::setprecision(3);
280 for (
UInt_t itgt=0; itgt<ntgts; itgt++)
281 Log() <<
" " << tars[itgt].GetExpression()
282 <<
"\t: [" << tars[itgt].GetMin() <<
"\t, " << tars[itgt].GetMax() <<
"\t] " <<
Endl;
283 Log() << std::setprecision(5);
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info