Logo ROOT   6.08/07
Reference Guide
MethodCFMlpANN.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCFMlpANN *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Interface for Clermond-Ferrand artificial neural network. *
12  * The ANN code has been translated from FORTRAN77 (f2c); *
13  * see files: MethodCFMlpANN_f2c_mlpl3.cpp *
14  * MethodCFMlpANN_f2c_datacc.cpp *
15  * *
16  * -------------------------------------------------------------------- *
17  * Reference for the original FORTRAN version: *
18  * Authors : J. Proriol and contributions from ALEPH-Clermont-Fd *
19  * Team members. Contact : gaypas@afal11.cern.ch *
20  * *
21  * Copyright: Laboratoire Physique Corpusculaire *
22  * Universite de Blaise Pascal, IN2P3/CNRS *
23  * -------------------------------------------------------------------- *
24  * *
25  * Usage: options are given through Factory: *
26  * factory->BookMethod( "MethodCFMlpANN", OptionsString ); *
27  * *
28  * where: *
29  * TString OptionsString = "n_training_cycles:n_hidden_layers" *
30  * *
31  * default is: n_training_cycles = 5000, n_layers = 4 *
32  * note that the number of hidden layers in the NN is *
33  * *
34  * n_hidden_layers = n_layers - 2 *
35  * *
36  * since there is one input and one output layer. The number of *
37  * nodes (neurons) is predefined to be *
38  * *
39  * n_nodes[i] = nvars + 1 - i (where i=1..n_layers) *
40  * *
41  * with nvars being the number of variables used in the NN. *
42  * Hence, the default case is: n_neurons(layer 1 (input)) : nvars *
43  * n_neurons(layer 2 (hidden)): nvars-1 *
44  * n_neurons(layer 3 (hidden)): nvars-1 *
45  * n_neurons(layer 4 (out)) : 2 *
46  * *
47  * This artificial neural network usually needs a relatively large *
48  * number of cycles to converge (8000 and more). Overtraining can *
49  * be efficienctly tested by comparing the signal and background *
50  * output of the NN for the events that were used for training and *
51  * an independent data sample (with equal properties). If the separation *
52  * performance is significantly better for the training sample, the *
53  * NN interprets statistical effects, and is hence overtrained. In *
54  * this case, the number of cycles should be reduced, or the size *
55  * of the training sample increased. *
56  * *
57  * Authors (alphabetical): *
58  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
59  * Xavier Prudent <prudent@lapp.in2p3.fr> - LAPP, France *
60  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
61  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
62  * *
63  * Copyright (c) 2005: *
64  * CERN, Switzerland *
65  * U. of Victoria, Canada *
66  * MPI-K Heidelberg, Germany *
67  * LAPP, Annecy, France *
68  * *
69  * Redistribution and use in source and binary forms, with or without *
70  * modification, are permitted according to the terms listed in LICENSE *
71  * (http://tmva.sourceforge.net/LICENSE) *
72  * *
73  **********************************************************************************/
74 
75 #ifndef ROOT_TMVA_MethodCFMlpANN
76 #define ROOT_TMVA_MethodCFMlpANN
77 
78 //////////////////////////////////////////////////////////////////////////
79 // //
80 // MethodCFMlpANN //
81 // //
82 // Interface for Clermond-Ferrand artificial neural network //
83 // //
84 //////////////////////////////////////////////////////////////////////////
85 
86 #include <iosfwd>
87 
88 #ifndef ROOT_TMVA_MethodBase
89 #include "TMVA/MethodBase.h"
90 #endif
91 #ifndef ROOT_TMVA_MethodCFMlpANN_Utils
93 #endif
94 #ifndef ROOT_TMatrixF
95 #include "TMatrixF.h"
96 #endif
97 
98 namespace TMVA {
99 
101 
102  public:
103 
104  MethodCFMlpANN( const TString& jobName,
105  const TString& methodTitle,
106  DataSetInfo& theData,
107  const TString& theOption = "3000:N-1:N-2");
108 
109  MethodCFMlpANN( DataSetInfo& theData,
110  const TString& theWeightFile);
111 
112  virtual ~MethodCFMlpANN( void );
113 
114  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ );
115 
116  // training method
117  void Train( void );
118 
120 
121  // write weights to file
122  void AddWeightsXMLTo( void* parent ) const;
123 
124  // read weights from file
125  void ReadWeightsFromStream( std::istream& istr );
126  void ReadWeightsFromXML( void* wghtnode );
127  // calculate the MVA value
128  Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
129 
130  // data accessors for external functions
131  Double_t GetData ( Int_t isel, Int_t ivar ) const { return (*fData)(isel, ivar); }
132  Int_t GetClass( Int_t ivar ) const { return (*fClass)[ivar]; }
133 
134 
135  // ranking of input variables
136  const Ranking* CreateRanking() { return 0; }
137 
138  protected:
139 
140  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
141  void MakeClassSpecific( std::ostream&, const TString& ) const;
142 
143  // header and auxiliary classes
144  void MakeClassSpecificHeader( std::ostream&, const TString& = "" ) const;
145 
146  // get help message text
147  void GetHelpMessage() const;
148 
150  Double_t*, Int_t*, Int_t* );
151 
152  private:
153 
154  void PrintWeights( std::ostream & o ) const;
155 
156  // the option handling methods
157  void DeclareOptions();
158  void ProcessOptions();
159 
160  // LUTs
161  TMatrixF *fData; // the (data,var) string
162  std::vector<Int_t> *fClass; // the event class (1=signal, 2=background)
163 
164  Int_t fNlayers; // number of layers (including input and output layers)
165  Int_t fNcycles; // number of training cycles
166  Int_t* fNodes; // number of nodes per layer
167 
168  // additional member variables for the independent NN::Evaluation phase
169  Double_t** fYNN; // weights
170  TString fLayerSpec; // the hidden layer specification string
172 
173  // auxiliary member functions
174  Double_t EvalANN( std::vector<Double_t>&, Bool_t& isOK );
175  void NN_ava ( Double_t* );
176  Double_t NN_fonc( Int_t, Double_t ) const;
177 
178  // default initialisation
179  void Init( void );
180 
181  ClassDef(MethodCFMlpANN,0); // Interface for Clermond-Ferrand artificial neural network
182  };
183 
184 } // namespace TMVA
185 
186 #endif
Double_t GetData(Int_t isel, Int_t ivar) const
void Train(void)
training of the Clement-Ferrand NN classifier
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: NCycles=xx :t...
void NN_ava(Double_t *)
auxiliary functions
void MakeClassSpecificHeader(std::ostream &, const TString &="") const
write specific classifier response for header
Int_t GetClass(Int_t ivar) const
void ReadWeightsFromXML(void *wghtnode)
read weights from xml file
void MakeClassSpecific(std::ostream &, const TString &) const
EAnalysisType
Definition: Types.h:129
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
Int_t DataInterface(Double_t *, Double_t *, Int_t *, Int_t *, Int_t *, Int_t *, Double_t *, Int_t *, Int_t *)
data interface function
TMatrixT.
Definition: TMatrixDfwd.h:24
virtual ~MethodCFMlpANN(void)
destructor
#define ClassDef(name, id)
Definition: Rtypes.h:254
std::vector< Int_t > * fClass
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
CFMlpANN can handle classification with 2 classes.
const Ranking * CreateRanking()
void PrintWeights(std::ostream &o) const
write the weights of the neural net
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns CFMlpANN output (normalised within [0,1])
Double_t NN_fonc(Int_t, Double_t) const
activation function
unsigned int UInt_t
Definition: RtypesCore.h:42
void GetHelpMessage() const
get help message text
Double_t EvalANN(std::vector< Double_t > &, Bool_t &isOK)
evaluates NN value as function of input variables
void ReadWeightsFromStream(std::istream &istr)
read back the weight from the training from file (stream)
double Double_t
Definition: RtypesCore.h:55
int type
Definition: TGX11.cxx:120
void AddWeightsXMLTo(void *parent) const
write weights to xml file
void Init(void)
default initialisation called by all constructors
void ProcessOptions()
decode the options in the option string
Abstract ClassifierFactory template that handles arbitrary types.
virtual void ReadWeightsFromStream(std::istream &)=0
MethodCFMlpANN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="3000:N-1:N-2")
standard constructor option string: "n_training_cycles:n_hidden_layers" default is: n_training_cycles...