169 <<
" kNN = \n" <<
fnkNN 174 <<
" Trim = \n" <<
fTrim 222 for (kNN::EventVec::const_iterator event =
fEvent.begin();
event !=
fEvent.end(); ++event) {
240 Log() <<
kINFO <<
"Input events are normalized - setting ScaleFrac to 0" <<
Endl;
249 Log() <<
kFATAL <<
"MethodKNN::Train() - mismatched or wrong number of event variables" <<
Endl;
279 kNN::Event event_knn(vvec, weight, event_type);
281 fEvent.push_back(event_knn);
312 for (
Int_t ivar = 0; ivar < nvar; ++ivar) {
323 if (rlist.size() != knn + 2) {
333 Bool_t use_gaus =
false, use_poln =
false;
335 if (
fKernel ==
"Gaus") use_gaus =
true;
336 else if (
fKernel ==
"Poln") use_poln =
true;
346 if (!(kradius > 0.0)) {
357 std::vector<Double_t> rms_vec;
361 if (rms_vec.empty() || rms_vec.size() != event_knn.
GetNVar()) {
368 Double_t weight_all = 0, weight_sig = 0, weight_bac = 0;
370 for (kNN::List::const_iterator lit = rlist.begin(); lit != rlist.end(); ++lit) {
377 if (lit->second < 0.0) {
378 Log() <<
kFATAL <<
"A neighbor has negative distance to query event" <<
Endl;
380 else if (!(lit->second > 0.0)) {
381 Log() <<
kVERBOSE <<
"A neighbor has zero distance to query event" <<
Endl;
392 if (node.
GetEvent().GetType() == 1) {
396 else if (node.
GetEvent().GetType() == 2) {
401 Log() <<
kFATAL <<
"Unknown type for training event" <<
Endl;
407 if (count_all >= knn) {
413 if (!(count_all > 0)) {
414 Log() <<
kFATAL <<
"Size kNN result list is not positive" <<
Endl;
419 if (count_all < knn) {
420 Log() <<
kDEBUG <<
"count_all and kNN have different size: " << count_all <<
" < " << knn <<
Endl;
424 if (!(weight_all > 0.0)) {
425 Log() <<
kFATAL <<
"kNN result total weight is not positive" <<
Endl;
429 return weight_sig/weight_all;
451 std::vector<float> reg_vec;
455 for (
Int_t ivar = 0; ivar < nvar; ++ivar) {
466 if (rlist.size() != knn + 2) {
475 for (kNN::List::const_iterator lit = rlist.begin(); lit != rlist.end(); ++lit) {
482 if (reg_vec.empty()) {
486 for(
UInt_t ivar = 0; ivar < tvec.size(); ++ivar) {
487 if (
fUseWeight) reg_vec[ivar] += tvec[ivar]*weight;
488 else reg_vec[ivar] += tvec[ivar];
497 if (count_all == knn) {
503 if (!(weight_all > 0.0)) {
504 Log() <<
kFATAL <<
"Total weight sum is not positive: " << weight_all <<
Endl;
508 for (
UInt_t ivar = 0; ivar < reg_vec.size(); ++ivar) {
509 reg_vec[ivar] /= weight_all;
535 for (kNN::EventVec::const_iterator event =
fEvent.begin();
event !=
fEvent.end(); ++event) {
537 std::stringstream s(
"");
539 for (
UInt_t ivar = 0; ivar <
event->GetNVar(); ++ivar) {
540 if (ivar>0) s <<
" ";
541 s << std::scientific <<
event->GetVar(ivar);
544 for (
UInt_t itgt = 0; itgt <
event->GetNTgt(); ++itgt) {
545 s <<
" " << std::scientific <<
event->GetTgt(itgt);
558 UInt_t nvar = 0, ntgt = 0;
573 std::stringstream s(
gTools().GetContent(ch) );
575 for(
UInt_t ivar=0; ivar<nvar; ivar++)
578 for(
UInt_t itgt=0; itgt<ntgt; itgt++)
583 kNN::Event event_knn(vvec, evtWeight, evtType, tvec);
584 fEvent.push_back(event_knn);
596 Log() <<
kINFO <<
"Starting ReadWeightsFromStream(std::istream& is) function..." <<
Endl;
607 std::getline(is, line);
609 if (line.empty() || line.find(
"#") != std::string::npos) {
614 std::string::size_type pos=0;
615 while( (pos=line.find(
',',pos)) != std::string::npos ) { count++; pos++; }
620 if (count < 3 || nvar != count - 2) {
631 std::string::size_type prev = 0;
633 for (std::string::size_type ipos = 0; ipos < line.size(); ++ipos) {
634 if (line[ipos] !=
',' && ipos + 1 != line.size()) {
638 if (!(ipos > prev)) {
642 std::string vstring = line.substr(prev, ipos - prev);
643 if (ipos + 1 == line.size()) {
644 vstring = line.substr(prev, ipos - prev + 1);
647 if (vstring.empty()) {
654 else if (vcount == 1) {
655 type = std::atoi(vstring.c_str());
657 else if (vcount == 2) {
658 weight = std::atof(vstring.c_str());
660 else if (vcount - 3 < vvec.size()) {
661 vvec[vcount - 3] = std::atof(vstring.c_str());
685 Log() <<
kINFO <<
"Starting WriteWeightsToStream(TFile &rf) function..." <<
Endl;
695 tree->
Branch(
"event",
"TMVA::kNN::Event", &event);
698 for (kNN::EventVec::const_iterator it =
fEvent.begin(); it !=
fEvent.end(); ++it) {
700 size += tree->
Fill();
710 <<
" events to ROOT file" <<
Endl;
721 Log() <<
kINFO <<
"Starting ReadWeightsFromStream(TFile &rf) function..." <<
Endl;
741 for (
Int_t i = 0; i < nevent; ++i) {
750 <<
" events from ROOT file" <<
Endl;
763 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
764 fout <<
"};" << std::endl;
778 Log() <<
"The k-nearest neighbor (k-NN) algorithm is a multi-dimensional classification" <<
Endl 779 <<
"and regression algorithm. Similarly to other TMVA algorithms, k-NN uses a set of" <<
Endl 780 <<
"training events for which a classification category/regression target is known. " <<
Endl 781 <<
"The k-NN method compares a test event to all training events using a distance " <<
Endl 782 <<
"function, which is an Euclidean distance in a space defined by the input variables. "<<
Endl 783 <<
"The k-NN method, as implemented in TMVA, uses a kd-tree algorithm to perform a" <<
Endl 784 <<
"quick search for the k events with shortest distance to the test event. The method" <<
Endl 785 <<
"returns a fraction of signal events among the k neighbors. It is recommended" <<
Endl 786 <<
"that a histogram which stores the k-NN decision variable is binned with k+1 bins" <<
Endl 787 <<
"between 0 and 1." <<
Endl;
790 Log() <<
gTools().
Color(
"bold") <<
"--- Performance tuning via configuration options: " 793 Log() <<
"The k-NN method estimates a density of signal and background events in a "<< Endl
794 <<
"neighborhood around the test event. The method assumes that the density of the " << Endl
795 <<
"signal and background events is uniform and constant within the neighborhood. " << Endl
796 <<
"k is an adjustable parameter and it determines an average size of the " << Endl
797 <<
"neighborhood. Small k values (less than 10) are sensitive to statistical " << Endl
798 <<
"fluctuations and large (greater than 100) values might not sufficiently capture " << Endl
799 <<
"local differences between events in the training set. The speed of the k-NN" << Endl
800 <<
"method also increases with larger values of k. " <<
Endl;
802 Log() <<
"The k-NN method assigns equal weight to all input variables. Different scales " << Endl
803 <<
"among the input variables is compensated using ScaleFrac parameter: the input " << Endl
804 <<
"variables are scaled so that the widths for central ScaleFrac*100% events are " << Endl
805 <<
"equal among all the input variables." <<
Endl;
808 Log() <<
gTools().
Color(
"bold") <<
"--- Additional configuration options: " 811 Log() <<
"The method inclues an option to use a Gaussian kernel to smooth out the k-NN" << Endl
812 <<
"response. The kernel re-weights events using a distance to the test event." <<
Endl;
822 if (!(avalue < 1.0)) {
826 const Double_t prod = 1.0 - avalue * avalue * avalue;
828 return (prod * prod * prod);
835 const kNN::Event &event,
const std::vector<Double_t> &svec)
const 837 if (event_knn.
GetNVar() !=
event.GetNVar() || event_knn.
GetNVar() != svec.size()) {
838 Log() <<
kFATAL <<
"Mismatched vectors in Gaussian kernel function" <<
Endl;
845 double sum_exp = 0.0;
847 for(
unsigned int ivar = 0; ivar < event_knn.
GetNVar(); ++ivar) {
849 const Double_t diff_ =
event.GetVar(ivar) - event_knn.
GetVar(ivar);
851 if (!(sigm_ > 0.0)) {
856 sum_exp += diff_*diff_/(2.0*sigm_*sigm_);
878 for (kNN::List::const_iterator lit = rlist.begin(); lit != rlist.end(); ++lit)
880 if (!(lit->second > 0.0))
continue;
882 if (kradius < lit->second || kradius < 0.0) kradius = lit->second;
885 if (kcount >= knn)
break;
898 std::vector<Double_t> rvec;
902 for (kNN::List::const_iterator lit = rlist.begin(); lit != rlist.end(); ++lit)
904 if (!(lit->second > 0.0))
continue;
910 rvec.insert(rvec.end(), event_.
GetNVar(), 0.0);
912 else if (rvec.size() != event_.
GetNVar()) {
913 Log() <<
kFATAL <<
"Wrong number of variables, should never happen!" <<
Endl;
918 for(
unsigned int ivar = 0; ivar < event_.
GetNVar(); ++ivar) {
920 rvec[ivar] += diff_*diff_;
924 if (kcount >= knn)
break;
933 for(
unsigned int ivar = 0; ivar < rvec.size(); ++ivar) {
934 if (!(rvec[ivar] > 0.0)) {
952 for (kNN::List::const_iterator lit = rlist.begin(); lit != rlist.end(); ++lit) {
958 if (node.
GetEvent().GetType() == 1) {
959 sig_vec.push_back(tvec);
961 else if (node.
GetEvent().GetType() == 2) {
962 bac_vec.push_back(tvec);
965 Log() <<
kFATAL <<
"Unknown type for training event" <<
Endl;
void ProcessOptions()
process the options specified by the user
void AddWeightsXMLTo(void *parent) const
write weights to XML
MsgLogger & Endl(MsgLogger &ml)
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void DeclareOptions()
MethodKNN options.
virtual Int_t Fill()
Fill all branches.
const List & GetkNNList() const
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
void Train(void)
kNN training
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
void MakeKNN(void)
create kNN
virtual Int_t GetEntry(Long64_t entry=0, Int_t getall=0)
Read all branches of entry and return total number of bytes read.
Bool_t Find(Event event, UInt_t nfind=100, const std::string &option="count") const
find in tree if tree has been filled then search for nfind closest events if metic (fVarScale map) is...
VarType GetVar(UInt_t i) const
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
virtual Int_t WriteTObject(const TObject *obj, const char *name=0, Option_t *option="", Int_t bufsize=0)
Write object obj to this directory.
void ReadWeightsFromStream(std::istream &istr)
read the weights
const std::vector< Double_t > getRMS(const kNN::List &rlist, const kNN::Event &event_knn) const
Get polynomial kernel radius.
void Init(void)
Initialization.
virtual Int_t SetBranchAddress(const char *bname, void *add, TBranch **ptr=0)
Change branch address, dealing with clone trees properly.
Double_t GetWeight() const
const Event * GetEvent() const
Float_t GetProb(const std::vector< Float_t > &x, Int_t k)
Signal probability with Gaussian approximation.
Double_t GausKernel(const kNN::Event &event_knn, const kNN::Event &event, const std::vector< Double_t > &svec) const
Gaussian kernel.
DataSetInfo & DataInfo() const
LDA fLDA
(untouched) events used for learning
void SetTargets(const VarVec &tvec)
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Bool_t Fill(const UShort_t odepth, UInt_t ifrac, const std::string &option="")
fill the tree
Double_t PolnKernel(Double_t value) const
polynomial kernel
void WriteWeightsToStream(TFile &rf) const
save weights to ROOT file
std::vector< Float_t > & GetTargets()
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Compute classifier response.
virtual ~MethodKNN(void)
destructor
Int_t fTreeOptDepth
Experimental feature for local knn analysis.
void Initialize(const LDAEvents &inputSignal, const LDAEvents &inputBackground)
Create LDA matrix using local events found by knn method.
void Add(const Event &event)
add an event to tree
const Ranking * CreateRanking()
no ranking available
void GetHelpMessage() const
get help message text
UInt_t GetNVariables() const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Bool_t IgnoreEventsWithNegWeightsInTraining() const
void ReadWeightsFromXML(void *wghtnode)
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
Bool_t IsNormalised() const
virtual Long64_t GetEntries() const
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
const VarVec & GetVars() const
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
std::vector< std::vector< Float_t > > LDAEvents
Double_t getKernelRadius(const kNN::List &rlist) const
Get polynomial kernel radius.
std::vector< Float_t > * fRegressionReturnVal
A TTree object has a header with a name and a title.
Double_t Sqrt(Double_t x)
const std::vector< Float_t > & GetRegressionValues()
Return vector of averages for target values of k-nearest neighbors.
std::vector< VarType > VarVec
MethodKNN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="KNN")
standard constructor
double getLDAValue(const kNN::List &rlist, const kNN::Event &event_knn)
Int_t fnkNN
module where all work is done
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
const T & GetEvent() const