169 <<
" kNN = \n" <<
fnkNN 174 <<
" Trim = \n" <<
fTrim 222 for (kNN::EventVec::const_iterator event =
fEvent.begin();
event !=
fEvent.end(); ++event) {
240 Log() <<
kINFO <<
"Input events are normalized - setting ScaleFrac to 0" <<
Endl;
249 Log() <<
kFATAL <<
"MethodKNN::Train() - mismatched or wrong number of event variables" <<
Endl;
279 kNN::Event event_knn(vvec, weight, event_type);
281 fEvent.push_back(event_knn);
312 for (
Int_t ivar = 0; ivar < nvar; ++ivar) {
323 if (rlist.size() != knn + 2) {
333 Bool_t use_gaus =
false, use_poln =
false;
335 if (
fKernel ==
"Gaus") use_gaus =
true;
336 else if (
fKernel ==
"Poln") use_poln =
true;
346 if (!(kradius > 0.0)) {
357 std::vector<Double_t> rms_vec;
361 if (rms_vec.empty() || rms_vec.size() != event_knn.
GetNVar()) {
368 Double_t weight_all = 0, weight_sig = 0, weight_bac = 0;
370 for (kNN::List::const_iterator lit = rlist.begin(); lit != rlist.end(); ++lit) {
377 if (lit->second < 0.0) {
378 Log() <<
kFATAL <<
"A neighbor has negative distance to query event" <<
Endl;
380 else if (!(lit->second > 0.0)) {
381 Log() <<
kVERBOSE <<
"A neighbor has zero distance to query event" <<
Endl;
392 if (node.
GetEvent().GetType() == 1) {
396 else if (node.
GetEvent().GetType() == 2) {
401 Log() <<
kFATAL <<
"Unknown type for training event" <<
Endl;
407 if (count_all >= knn) {
413 if (!(count_all > 0)) {
414 Log() <<
kFATAL <<
"Size kNN result list is not positive" <<
Endl;
419 if (count_all < knn) {
420 Log() <<
kDEBUG <<
"count_all and kNN have different size: " << count_all <<
" < " << knn <<
Endl;
424 if (!(weight_all > 0.0)) {
425 Log() <<
kFATAL <<
"kNN result total weight is not positive" <<
Endl;
429 return weight_sig/weight_all;
451 std::vector<float> reg_vec;
455 for (
Int_t ivar = 0; ivar < nvar; ++ivar) {
466 if (rlist.size() != knn + 2) {
475 for (kNN::List::const_iterator lit = rlist.begin(); lit != rlist.end(); ++lit) {
482 if (reg_vec.empty()) {
486 for(
UInt_t ivar = 0; ivar < tvec.size(); ++ivar) {
487 if (
fUseWeight) reg_vec[ivar] += tvec[ivar]*weight;
488 else reg_vec[ivar] += tvec[ivar];
497 if (count_all == knn) {
503 if (!(weight_all > 0.0)) {
504 Log() <<
kFATAL <<
"Total weight sum is not positive: " << weight_all <<
Endl;
508 for (
UInt_t ivar = 0; ivar < reg_vec.size(); ++ivar) {
509 reg_vec[ivar] /= weight_all;
535 for (kNN::EventVec::const_iterator event =
fEvent.begin();
event !=
fEvent.end(); ++event) {
537 std::stringstream s(
"");
539 for (
UInt_t ivar = 0; ivar <
event->GetNVar(); ++ivar) {
540 if (ivar>0) s <<
" ";
541 s << std::scientific <<
event->GetVar(ivar);
544 for (
UInt_t itgt = 0; itgt <
event->GetNTgt(); ++itgt) {
545 s <<
" " << std::scientific <<
event->GetTgt(itgt);
558 UInt_t nvar = 0, ntgt = 0;
573 std::stringstream s(
gTools().GetContent(ch) );
575 for(
UInt_t ivar=0; ivar<nvar; ivar++)
578 for(
UInt_t itgt=0; itgt<ntgt; itgt++)
583 kNN::Event event_knn(vvec, evtWeight, evtType, tvec);
584 fEvent.push_back(event_knn);
596 Log() <<
kINFO <<
"Starting ReadWeightsFromStream(std::istream& is) function..." <<
Endl;
607 std::getline(is, line);
609 if (line.empty() || line.find(
"#") != std::string::npos) {
614 std::string::size_type pos=0;
615 while( (pos=line.find(
',',pos)) != std::string::npos ) { count++; pos++; }
620 if (count < 3 || nvar != count - 2) {
631 std::string::size_type prev = 0;
633 for (std::string::size_type ipos = 0; ipos < line.size(); ++ipos) {
634 if (line[ipos] !=
',' && ipos + 1 != line.size()) {
638 if (!(ipos > prev)) {
642 std::string vstring = line.substr(prev, ipos - prev);
643 if (ipos + 1 == line.size()) {
644 vstring = line.substr(prev, ipos - prev + 1);
647 if (vstring.empty()) {
654 else if (vcount == 1) {
655 type = std::atoi(vstring.c_str());
657 else if (vcount == 2) {
658 weight = std::atof(vstring.c_str());
660 else if (vcount - 3 < vvec.size()) {
661 vvec[vcount - 3] = std::atof(vstring.c_str());
685 Log() <<
kINFO <<
"Starting WriteWeightsToStream(TFile &rf) function..." <<
Endl;
695 tree->
Branch(
"event",
"TMVA::kNN::Event", &event);
698 for (kNN::EventVec::const_iterator it =
fEvent.begin(); it !=
fEvent.end(); ++it) {
700 size += tree->
Fill();
710 <<
" events to ROOT file" <<
Endl;
721 Log() <<
kINFO <<
"Starting ReadWeightsFromStream(TFile &rf) function..." <<
Endl;
741 for (
Int_t i = 0; i < nevent; ++i) {
750 <<
" events from ROOT file" <<
Endl;
763 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
764 fout <<
"};" << std::endl;
778 Log() <<
"The k-nearest neighbor (k-NN) algorithm is a multi-dimensional classification" <<
Endl 779 <<
"and regression algorithm. Similarly to other TMVA algorithms, k-NN uses a set of" <<
Endl 780 <<
"training events for which a classification category/regression target is known. " <<
Endl 781 <<
"The k-NN method compares a test event to all training events using a distance " <<
Endl 782 <<
"function, which is an Euclidean distance in a space defined by the input variables. "<<
Endl 783 <<
"The k-NN method, as implemented in TMVA, uses a kd-tree algorithm to perform a" <<
Endl 784 <<
"quick search for the k events with shortest distance to the test event. The method" <<
Endl 785 <<
"returns a fraction of signal events among the k neighbors. It is recommended" <<
Endl 786 <<
"that a histogram which stores the k-NN decision variable is binned with k+1 bins" <<
Endl 787 <<
"between 0 and 1." <<
Endl;
790 Log() <<
gTools().
Color(
"bold") <<
"--- Performance tuning via configuration options: " 793 Log() <<
"The k-NN method estimates a density of signal and background events in a "<< Endl
794 <<
"neighborhood around the test event. The method assumes that the density of the " << Endl
795 <<
"signal and background events is uniform and constant within the neighborhood. " << Endl
796 <<
"k is an adjustable parameter and it determines an average size of the " << Endl
797 <<
"neighborhood. Small k values (less than 10) are sensitive to statistical " << Endl
798 <<
"fluctuations and large (greater than 100) values might not sufficiently capture " << Endl
799 <<
"local differences between events in the training set. The speed of the k-NN" << Endl
800 <<
"method also increases with larger values of k. " <<
Endl;
802 Log() <<
"The k-NN method assigns equal weight to all input variables. Different scales " << Endl
803 <<
"among the input variables is compensated using ScaleFrac parameter: the input " << Endl
804 <<
"variables are scaled so that the widths for central ScaleFrac*100% events are " << Endl
805 <<
"equal among all the input variables." <<
Endl;
808 Log() <<
gTools().
Color(
"bold") <<
"--- Additional configuration options: " 811 Log() <<
"The method inclues an option to use a Gaussian kernel to smooth out the k-NN" << Endl
812 <<
"response. The kernel re-weights events using a distance to the test event." <<
Endl;
822 if (!(avalue < 1.0)) {
826 const Double_t prod = 1.0 - avalue * avalue * avalue;
828 return (prod * prod * prod);
835 const kNN::Event &event,
const std::vector<Double_t> &svec)
const 837 if (event_knn.
GetNVar() !=
event.GetNVar() || event_knn.
GetNVar() != svec.size()) {
838 Log() <<
kFATAL <<
"Mismatched vectors in Gaussian kernel function" <<
Endl;
845 double sum_exp = 0.0;
847 for(
unsigned int ivar = 0; ivar < event_knn.
GetNVar(); ++ivar) {
849 const Double_t diff_ =
event.GetVar(ivar) - event_knn.
GetVar(ivar);
851 if (!(sigm_ > 0.0)) {
856 sum_exp += diff_*diff_/(2.0*sigm_*sigm_);
878 for (kNN::List::const_iterator lit = rlist.begin(); lit != rlist.end(); ++lit)
880 if (!(lit->second > 0.0))
continue;
882 if (kradius < lit->second || kradius < 0.0) kradius = lit->second;
885 if (kcount >= knn)
break;
898 std::vector<Double_t> rvec;
902 for (kNN::List::const_iterator lit = rlist.begin(); lit != rlist.end(); ++lit)
904 if (!(lit->second > 0.0))
continue;
910 rvec.insert(rvec.end(), event_.
GetNVar(), 0.0);
912 else if (rvec.size() != event_.
GetNVar()) {
913 Log() <<
kFATAL <<
"Wrong number of variables, should never happen!" <<
Endl;
918 for(
unsigned int ivar = 0; ivar < event_.
GetNVar(); ++ivar) {
920 rvec[ivar] += diff_*diff_;
924 if (kcount >= knn)
break;
933 for(
unsigned int ivar = 0; ivar < rvec.size(); ++ivar) {
934 if (!(rvec[ivar] > 0.0)) {
952 for (kNN::List::const_iterator lit = rlist.begin(); lit != rlist.end(); ++lit) {
958 if (node.
GetEvent().GetType() == 1) {
959 sig_vec.push_back(tvec);
961 else if (node.
GetEvent().GetType() == 2) {
962 bac_vec.push_back(tvec);
965 Log() <<
kFATAL <<
"Unknown type for training event" <<
Endl;
void ProcessOptions()
process the options specified by the user
MsgLogger & Endl(MsgLogger &ml)
#define REGISTER_METHOD(CLASS)
for example
void DeclareOptions()
MethodKNN options.
Bool_t IgnoreEventsWithNegWeightsInTraining() const
virtual Int_t Fill()
Fill all branches.
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
void Train(void)
kNN training
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
void MakeKNN(void)
create kNN
virtual Int_t GetEntry(Long64_t entry=0, Int_t getall=0)
Read all branches of entry and return total number of bytes read.
Bool_t IsNormalised() const
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
virtual Int_t WriteTObject(const TObject *obj, const char *name=0, Option_t *option="", Int_t bufsize=0)
Write object obj to this directory.
void ReadWeightsFromStream(std::istream &istr)
read the weights
void Init(void)
Initialization.
virtual Int_t SetBranchAddress(const char *bname, void *add, TBranch **ptr=0)
Change branch address, dealing with clone trees properly.
Double_t PolnKernel(Double_t value) const
polynomial kernel
Float_t GetProb(const std::vector< Float_t > &x, Int_t k)
Signal probability with Gaussian approximation.
void WriteWeightsToStream(TFile &rf) const
save weights to ROOT file
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
LDA fLDA
(untouched) events used for learning
void SetTargets(const VarVec &tvec)
Bool_t Fill(const UShort_t odepth, UInt_t ifrac, const std::string &option="")
fill the tree
const VarVec & GetVars() const
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
std::vector< Float_t > & GetTargets()
UInt_t GetNVariables() const
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Compute classifier response.
Double_t GausKernel(const kNN::Event &event_knn, const kNN::Event &event, const std::vector< Double_t > &svec) const
Gaussian kernel.
virtual ~MethodKNN(void)
destructor
Int_t fTreeOptDepth
Experimental feature for local knn analysis.
const Event * GetEvent() const
void Initialize(const LDAEvents &inputSignal, const LDAEvents &inputBackground)
Create LDA matrix using local events found by knn method.
void Add(const Event &event)
add an event to tree
const Ranking * CreateRanking()
no ranking available
Bool_t Find(Event event, UInt_t nfind=100, const std::string &option="count") const
find in tree if tree has been filled then search for nfind closest events if metic (fVarScale map) is...
const List & GetkNNList() const
void ReadWeightsFromXML(void *wghtnode)
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
Double_t GetWeight() const
const T & GetEvent() const
void AddWeightsXMLTo(void *parent) const
write weights to XML
DataSetInfo & DataInfo() const
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Abstract ClassifierFactory template that handles arbitrary types.
VarType GetVar(UInt_t i) const
void GetHelpMessage() const
get help message text
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
const std::vector< Double_t > getRMS(const kNN::List &rlist, const kNN::Event &event_knn) const
Get polynomial kernel radius.
std::vector< std::vector< Float_t > > LDAEvents
std::vector< Float_t > * fRegressionReturnVal
virtual Long64_t GetEntries() const
A TTree object has a header with a name and a title.
Double_t Sqrt(Double_t x)
const std::vector< Float_t > & GetRegressionValues()
Return vector of averages for target values of k-nearest neighbors.
std::vector< VarType > VarVec
MethodKNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="KNN")
standard constructor
double getLDAValue(const kNN::List &rlist, const kNN::Event &event_knn)
Int_t fnkNN
module where all work is done
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Double_t getKernelRadius(const kNN::List &rlist) const
Get polynomial kernel radius.