47 #ifndef ROOT_TMVA_MsgLogger
56 TMVA::VariableTransformBase::VariableTransformBase( DataSetInfo& dsi,
57 Types::EVariableTransform tf,
63 fBackTransformedEvent(0),
64 fVariableTransform(tf),
68 fTransformName(trfName),
69 fVariableTypesAreCounted(
false),
77 fLogger =
new MsgLogger(
this,
kINFO);
78 for (
UInt_t ivar = 0; ivar < fDsi.GetNVariables(); ivar++) {
79 fVariables.push_back( VariableInfo( fDsi.GetVariableInfo(ivar) ) );
81 for (
UInt_t itgt = 0; itgt < fDsi.GetNTargets(); itgt++) {
82 fTargets.push_back( VariableInfo( fDsi.GetTargetInfo(itgt) ) );
84 for (
UInt_t ispct = 0; ispct < fDsi.GetNSpectators(); ispct++) {
85 fTargets.push_back( VariableInfo( fDsi.GetSpectatorInfo(ispct) ) );
104 TString inputVariables = _inputVariables;
109 UInt_t nvars = GetNVariables();
110 UInt_t ntgts = GetNTargets();
111 UInt_t nspcts = GetNSpectators();
113 typedef std::set<Int_t> SelectedIndices;
115 SelectedIndices varIndices;
116 SelectedIndices tgtIndices;
117 SelectedIndices spctIndices;
119 if (inputVariables ==
"")
121 inputVariables =
"_V_,_T_";
136 if( variables.
Length() == 0 ){
137 for(
UInt_t ivar = 0; ivar < nvars; ++ivar ) {
138 fGet.push_back( std::pair<Char_t,UInt_t>(
'v',ivar) );
139 varIndices.insert( ivar );
144 Log() <<
kFATAL <<
"You selected variable with index : " << idx <<
" of only " << nvars <<
" variables." <<
Endl;
145 fGet.push_back( std::pair<Char_t,UInt_t>(
'v',idx) );
146 varIndices.insert( idx );
150 if( variables.
Length() == 0 ){
151 for(
UInt_t itgt = 0; itgt < ntgts; ++itgt ) {
152 fGet.push_back( std::pair<Char_t,UInt_t>(
't',itgt) );
153 tgtIndices.insert( itgt );
158 Log() <<
kFATAL <<
"You selected target with index : " << idx <<
" of only " << ntgts <<
" targets." <<
Endl;
159 fGet.push_back( std::pair<Char_t,UInt_t>(
't',idx) );
160 tgtIndices.insert( idx );
164 if( variables.
Length() == 0 ){
165 for(
UInt_t ispct = 0; ispct < nspcts; ++ispct ) {
166 fGet.push_back( std::pair<Char_t,UInt_t>(
's',ispct) );
167 spctIndices.insert( ispct );
172 Log() <<
kFATAL <<
"You selected spectator with index : " << idx <<
" of only " << nspcts <<
" spectators." <<
Endl;
173 fGet.push_back( std::pair<Char_t,UInt_t>(
's',idx) );
174 spctIndices.insert( idx );
177 ToggleInputSortOrder(
kFALSE );
179 Log() <<
kINFO <<
"Variable rearrangement set true: Variable order given in transformation option is used for input to transformation!" <<
Endl;
183 Int_t numIndices = varIndices.size()+tgtIndices.size()+spctIndices.size();
184 for(
UInt_t ivar = 0; ivar < nvars; ++ivar ) {
185 if( fDsi.GetVariableInfo( ivar ).GetLabel() ==
variables ) {
186 fGet.push_back( std::pair<Char_t,UInt_t>(
'v',ivar) );
187 varIndices.insert( ivar );
191 for(
UInt_t itgt = 0; itgt < ntgts; ++itgt ) {
192 if( fDsi.GetTargetInfo( itgt ).GetLabel() ==
variables ) {
193 fGet.push_back( std::pair<Char_t,UInt_t>(
't',itgt) );
194 tgtIndices.insert( itgt );
198 for(
UInt_t ispct = 0; ispct < nspcts; ++ispct ) {
199 if( fDsi.GetSpectatorInfo( ispct ).GetLabel() ==
variables ) {
200 fGet.push_back( std::pair<Char_t,UInt_t>(
's',ispct) );
201 spctIndices.insert( ispct );
205 Int_t numIndicesEndOfLoop = varIndices.size()+tgtIndices.size()+spctIndices.size();
206 if( numIndicesEndOfLoop == numIndices )
207 Log() <<
kWARNING <<
"Error at parsing the options for the variable transformations: Variable/Target/Spectator '" << variables.
Data() <<
"' not found." <<
Endl;
208 numIndices = numIndicesEndOfLoop;
213 if( putIntoVariables ) {
215 for( SelectedIndices::iterator it = varIndices.begin(), itEnd = varIndices.end(); it != itEnd; ++it ) {
216 fPut.push_back( std::pair<Char_t,UInt_t>(
'v',idx) );
219 for( SelectedIndices::iterator it = tgtIndices.begin(), itEnd = tgtIndices.end(); it != itEnd; ++it ) {
220 fPut.push_back( std::pair<Char_t,UInt_t>(
't',idx) );
223 for( SelectedIndices::iterator it = spctIndices.begin(), itEnd = spctIndices.end(); it != itEnd; ++it ) {
224 fPut.push_back( std::pair<Char_t,UInt_t>(
's',idx) );
228 for( SelectedIndices::iterator it = varIndices.begin(), itEnd = varIndices.end(); it != itEnd; ++it ) {
230 fPut.push_back( std::pair<Char_t,UInt_t>(
'v',idx) );
232 for( SelectedIndices::iterator it = tgtIndices.begin(), itEnd = tgtIndices.end(); it != itEnd; ++it ) {
234 fPut.push_back( std::pair<Char_t,UInt_t>(
't',idx) );
236 for( SelectedIndices::iterator it = spctIndices.begin(), itEnd = spctIndices.end(); it != itEnd; ++it ) {
238 fPut.push_back( std::pair<Char_t,UInt_t>(
's',idx) );
244 fGet.assign( fPut.begin(), fPut.end() );
249 Log() <<
kINFO <<
"Transformation, Variable selection : " <<
Endl;
252 const DataSetInfo* outputDsiPtr = (fDsiOutput? &(*fDsiOutput) : &fDsi );
256 ItVarTypeIdx itGet = fGet.begin(), itGetEnd = fGet.end();
258 for( ; itGet != itGetEnd; ++itGet ) {
261 Char_t inputType = (*itGet).first;
262 Int_t inputIdx = (*itGet).second;
264 TString inputLabel =
"NOT FOND";
265 if( inputType ==
'v' ) {
266 inputLabel = fDsi.GetVariableInfo( inputIdx ).GetLabel();
267 inputTypeString =
"variable";
269 else if( inputType ==
't' ){
270 inputLabel = fDsi.GetTargetInfo( inputIdx ).GetLabel();
271 inputTypeString =
"target";
273 else if( inputType ==
's' ){
274 inputLabel = fDsi.GetSpectatorInfo( inputIdx ).GetLabel();
275 inputTypeString =
"spectator";
278 TString outputTypeString =
"?";
280 Char_t outputType = (*itPut).first;
281 Int_t outputIdx = (*itPut).second;
283 TString outputLabel =
"NOT FOUND";
284 if( outputType ==
'v' ) {
286 outputTypeString =
"variable";
288 else if( outputType ==
't' ){
290 outputTypeString =
"target";
292 else if( outputType ==
's' ){
294 outputTypeString =
"spectator";
298 Log() <<
kINFO <<
"Input : " << inputTypeString.
Data() <<
" '" << inputLabel.
Data() <<
"' (index=" << inputIdx <<
"). <---> "
299 <<
"Output : " << outputTypeString.
Data() <<
" '" << outputLabel.
Data() <<
"' (index=" << outputIdx <<
")." <<
Endl;
317 if( backTransformation && !fPut.empty() ){
318 itEntry = fPut.begin();
319 itEntryEnd = fPut.end();
320 input.reserve(fPut.size());
323 itEntry = fGet.begin();
324 itEntryEnd = fGet.end();
325 input.reserve(fGet.size() );
330 for( ; itEntry != itEntryEnd; ++itEntry ) {
332 Int_t idx = (*itEntry).second;
337 input.push_back( event->
GetValue(idx) );
340 input.push_back( event->
GetTarget(idx) );
346 Log() <<
kFATAL <<
"VariableTransformBase/GetInput : unknown type '" << type <<
"'." <<
Endl;
350 catch(std::out_of_range& ){
351 input.push_back(0.
f);
352 mask.push_back(
kTRUE);
353 hasMaskedEntries =
kTRUE;
356 return hasMaskedEntries;
364 std::vector<Float_t>::iterator itOutput = output.begin();
365 std::vector<Char_t>::iterator itMask = mask.begin();
368 event->CopyVarValues( *oldEvent );
375 if( backTransformation || fPut.empty() ){
376 itEntry = fGet.begin();
377 itEntryEnd = fGet.end();
380 itEntry = fPut.begin();
381 itEntryEnd = fPut.end();
385 for( ; itEntry != itEntryEnd; ++itEntry ) {
392 Int_t idx = (*itEntry).second;
393 if (itOutput == output.end())
Log() <<
kFATAL <<
"Read beyond array boundaries in VariableTransformBase::SetOutput"<<
Endl;
398 event->SetVal( idx, value );
401 event->SetTarget( idx, value );
404 event->SetSpectator( idx, value );
407 Log() <<
kFATAL <<
"VariableTransformBase/GetInput : unknown type '" << type <<
"'." <<
Endl;
409 if( !(*itMask) ) ++itOutput;
413 }
catch( std::exception&
except ){
414 Log() <<
kFATAL <<
"VariableTransformBase/SetOutput : exception/" << except.what() <<
Endl;
425 if( fVariableTypesAreCounted ){
428 nspcts = fNSpectators;
432 nvars = ntgts = nspcts = 0;
434 for(
ItVarTypeIdxConst itEntry = fGet.begin(), itEntryEnd = fGet.end(); itEntry != itEntryEnd; ++itEntry ) {
448 Log() <<
kFATAL <<
"VariableTransformBase/GetVariableTypeNumbers : unknown type '" << type <<
"'." <<
Endl;
454 fNSpectators = nspcts;
456 fVariableTypesAreCounted =
true;
467 if (!IsCreated())
return;
469 const UInt_t nvars = GetNVariables();
470 const UInt_t ntgts = GetNTargets();
472 UInt_t nevts = events.size();
475 TVectorD x0( nvars+ntgts ); x0 *= 0;
478 for (
UInt_t ievt=0; ievt<nevts; ievt++) {
479 const Event* ev = events[ievt];
482 sumOfWeights += weight;
483 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
486 Variables().at(ivar).SetMin(x);
487 Variables().at(ivar).SetMax(x);
490 UpdateNorm( ivar, x );
492 x0(ivar) += x*weight;
493 x2(ivar) += x*x*weight;
495 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
498 Targets().at(itgt).SetMin(x);
499 Targets().at(itgt).SetMax(x);
502 UpdateNorm( nvars+itgt, x );
504 x0(nvars+itgt) += x*weight;
505 x2(nvars+itgt) += x*x*weight;
509 if (sumOfWeights <= 0) {
510 Log() <<
kFATAL <<
" the sum of event weights calcualted for your input is == 0"
511 <<
" or exactly: " << sumOfWeights <<
" there is obviously some problem..."<<
Endl;
515 for (
UInt_t ivar=0; ivar<nvars; ivar++) {
516 Double_t mean = x0(ivar)/sumOfWeights;
518 Variables().at(ivar).SetMean( mean );
519 if (
x2(ivar)/sumOfWeights - mean*mean < 0) {
520 Log() <<
kFATAL <<
" the RMS of your input variable " << ivar
521 <<
" evaluates to an imaginary number: sqrt("<<
x2(ivar)/sumOfWeights - mean*mean
522 <<
") .. sometimes related to a problem with outliers and negative event weights"
525 Variables().at(ivar).SetRMS(
TMath::Sqrt(
x2(ivar)/sumOfWeights - mean*mean) );
527 for (
UInt_t itgt=0; itgt<ntgts; itgt++) {
528 Double_t mean = x0(nvars+itgt)/sumOfWeights;
529 Targets().at(itgt).SetMean( mean );
530 if (
x2(nvars+itgt)/sumOfWeights - mean*mean < 0) {
531 Log() <<
kFATAL <<
" the RMS of your target variable " << itgt
532 <<
" evaluates to an imaginary number: sqrt(" <<
x2(nvars+itgt)/sumOfWeights - mean*mean
533 <<
") .. sometimes related to a problem with outliers and negative event weights"
536 Targets().at(itgt).SetRMS(
TMath::Sqrt(
x2(nvars+itgt)/sumOfWeights - mean*mean) );
540 Log() << std::setprecision(3);
541 for (
UInt_t ivar=0; ivar<GetNVariables(); ivar++)
542 Log() <<
" " << Variables().at(ivar).GetInternalName()
543 <<
"\t: [" << Variables().at(ivar).GetMin() <<
"\t, " << Variables().at(ivar).GetMax() <<
"\t] " <<
Endl;
545 Log() << std::setprecision(3);
546 for (
UInt_t itgt=0; itgt<GetNTargets(); itgt++)
547 Log() <<
" " << Targets().at(itgt).GetInternalName()
548 <<
"\t: [" << Targets().at(itgt).GetMin() <<
"\t, " << Targets().at(itgt).GetMax() <<
"\t] " <<
Endl;
549 Log() << std::setprecision(5);
559 std::vector<TString>* strVec =
new std::vector<TString>;
560 for (
UInt_t ivar=0; ivar<GetNVariables(); ivar++) {
561 strVec->push_back( Variables()[ivar].GetLabel() +
"_[transformed]");
573 Int_t nvars = fDsi.GetNVariables();
575 if (x < Variables().
at(ivar).GetMin()) Variables().at(ivar).SetMin(x);
576 if (x > Variables().
at(ivar).GetMax()) Variables().at(ivar).SetMax(x);
578 if (x < Targets().
at(ivar-nvars).GetMin()) Targets().at(ivar-nvars).SetMin(x);
579 if (x > Targets().
at(ivar-nvars).GetMax()) Targets().at(ivar-nvars).SetMax(x);
594 const DataSetInfo* outputDsiPtr = (fDsiOutput? fDsiOutput : &fDsi );
596 for(
ItVarTypeIdx itGet = fGet.begin(), itGetEnd = fGet.end(); itGet != itGetEnd; ++itGet ) {
597 UInt_t idx = (*itGet).second;
605 typeString =
"Variable";
606 label = fDsi.GetVariableInfo( idx ).GetLabel();
607 expression = fDsi.GetVariableInfo( idx ).GetExpression();
610 typeString =
"Target";
611 label = fDsi.GetTargetInfo( idx ).GetLabel();
612 expression = fDsi.GetTargetInfo( idx ).GetExpression();
615 typeString =
"Spectator";
616 label = fDsi.GetSpectatorInfo( idx ).GetLabel();
617 expression = fDsi.GetSpectatorInfo( idx ).GetExpression();
620 Log() <<
kFATAL <<
"VariableTransformBase/AttachXMLTo unknown variable type '" << type <<
"'." <<
Endl;
634 for(
ItVarTypeIdx itPut = fPut.begin(), itPutEnd = fPut.end(); itPut != itPutEnd; ++itPut ) {
635 UInt_t idx = (*itPut).second;
643 typeString =
"Variable";
648 typeString =
"Target";
653 typeString =
"Spectator";
658 Log() <<
kFATAL <<
"VariableTransformBase/AttachXMLTo unknown variable type '" << type <<
"'." <<
Endl;
679 UInt_t nvars = GetNVariables();
680 UInt_t ntgts = GetNTargets();
681 UInt_t nspcts = GetNSpectators();
701 if( typeString ==
"Variable" ){
702 for(
UInt_t ivar = 0; ivar < nvars; ++ivar ) {
703 if( fDsi.GetVariableInfo( ivar ).GetLabel() == label ||
704 fDsi.GetVariableInfo( ivar ).GetExpression() == expression) {
705 fGet.push_back( std::pair<Char_t,UInt_t>(
'v',ivar) );
709 }
else if( typeString ==
"Target" ){
710 for(
UInt_t itgt = 0; itgt < ntgts; ++itgt ) {
711 if( fDsi.GetTargetInfo( itgt ).GetLabel() == label ||
712 fDsi.GetTargetInfo( itgt ).GetExpression() == expression ) {
713 fGet.push_back( std::pair<Char_t,UInt_t>(
't',itgt) );
717 }
else if( typeString ==
"Spectator" ){
718 for(
UInt_t ispct = 0; ispct < nspcts; ++ispct ) {
719 if( fDsi.GetSpectatorInfo( ispct ).GetLabel() == label ||
720 fDsi.GetSpectatorInfo( ispct ).GetExpression() == expression ) {
721 fGet.push_back( std::pair<Char_t,UInt_t>(
's',ispct) );
726 Log() <<
kFATAL <<
"VariableTransformationBase/ReadFromXML : unknown type '" << typeString <<
"'." <<
Endl;
731 assert( nInputs == fGet.size() );
749 if( typeString ==
"Variable" ){
750 for(
UInt_t ivar = 0; ivar < nvars; ++ivar ) {
751 if( fDsi.GetVariableInfo( ivar ).GetLabel() == label ||
752 fDsi.GetVariableInfo( ivar ).GetExpression() == expression ) {
753 fPut.push_back( std::pair<Char_t,UInt_t>(
'v',ivar) );
757 }
else if( typeString ==
"Target" ){
758 for(
UInt_t itgt = 0; itgt < ntgts; ++itgt ) {
759 if( fDsi.GetTargetInfo( itgt ).GetLabel() == label ||
760 fDsi.GetTargetInfo( itgt ).GetExpression() == expression ) {
761 fPut.push_back( std::pair<Char_t,UInt_t>(
't',itgt) );
765 }
else if( typeString ==
"Spectator" ){
766 for(
UInt_t ispct = 0; ispct < nspcts; ++ispct ) {
767 if( fDsi.GetSpectatorInfo( ispct ).GetLabel() == label ||
768 fDsi.GetSpectatorInfo( ispct ).GetExpression() == expression ) {
769 fPut.push_back( std::pair<Char_t,UInt_t>(
's',ispct) );
774 Log() <<
kFATAL <<
"VariableTransformationBase/ReadFromXML : unknown type '" << typeString <<
"'." <<
Endl;
779 assert( nOutputs == fPut.size() );
793 fout <<
" // define the indices of the variables which are transformed by this transformation" << std::endl;
794 fout <<
" static std::vector<int> indicesGet;" << std::endl;
795 fout <<
" static std::vector<int> indicesPut;" << std::endl << std::endl;
796 fout <<
" if ( indicesGet.empty() ) { " << std::endl;
797 fout <<
" indicesGet.reserve(fNvars);" << std::endl;
799 for(
ItVarTypeIdxConst itEntry = fGet.begin(), itEntryEnd = fGet.end(); itEntry != itEntryEnd; ++itEntry ) {
801 Int_t idx = (*itEntry).second;
805 fout <<
" indicesGet.push_back( " << idx <<
");" << std::endl;
808 Log() <<
kWARNING <<
"MakeClass doesn't work with transformation of targets. The results will be wrong!" <<
Endl;
811 Log() <<
kWARNING <<
"MakeClass doesn't work with transformation of spectators. The results will be wrong!" <<
Endl;
814 Log() <<
kFATAL <<
"VariableTransformBase/GetInput : unknown type '" << type <<
"'." <<
Endl;
817 fout <<
" } " << std::endl;
818 fout <<
" if ( indicesPut.empty() ) { " << std::endl;
819 fout <<
" indicesPut.reserve(fNvars);" << std::endl;
821 for(
ItVarTypeIdxConst itEntry = fPut.begin(), itEntryEnd = fPut.end(); itEntry != itEntryEnd; ++itEntry ) {
823 Int_t idx = (*itEntry).second;
827 fout <<
" indicesPut.push_back( " << idx <<
");" << std::endl;
830 Log() <<
kWARNING <<
"MakeClass doesn't work with transformation of targets. The results will be wrong!" <<
Endl;
833 Log() <<
kWARNING <<
"MakeClass doesn't work with transformation of spectators. The results will be wrong!" <<
Endl;
836 Log() <<
kFATAL <<
"VariableTransformBase/PutInput : unknown type '" << type <<
"'." <<
Endl;
840 fout <<
" } " << std::endl;
843 }
else if( part == 1){
#define TMVA_VERSION_CODE
MsgLogger & Endl(MsgLogger &ml)
void variables(TString fin="TMVA.root", TString dirName="InputVariables_Id", TString title="TMVA Input Variables", Bool_t isRegression=kFALSE, Bool_t useTMVAStyle=kTRUE)
Collectable string class.
Float_t GetSpectator(UInt_t ivar) const
return spectator content
const TString & GetExpression() const
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
ClassImp(TIterator) Bool_t TIterator return false
Compare two iterator objects.
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
const char * Data() const
static const double x2[5]
Int_t Atoi() const
Return integer value of string.
Bool_t EndsWith(const char *pat, ECaseCompare cmp=kExact) const
Return true if string ends with the specified string.
VariableInfo & GetTargetInfo(Int_t i)
TString & Remove(Ssiz_t pos)
VariableInfo & GetSpectatorInfo(Int_t i)
VariableInfo & GetVariableInfo(Int_t i)
Mother of all ROOT objects.
Float_t GetTarget(UInt_t itgt) const
Abstract ClassifierFactory template that handles arbitrary types.
Double_t Sqrt(Double_t x)
const TString & GetLabel() const
static void output(int code)