Logo ROOT   6.07/09
Reference Guide
Tools.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Tools *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Global auxiliary applications and data treatment routines *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
16  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
17  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
18  * *
19  * Copyright (c) 2005: *
20  * CERN, Switzerland *
21  * U. of Victoria, Canada *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 #ifndef ROOT_TMVA_Tools
30 #define ROOT_TMVA_Tools
31 
32 //////////////////////////////////////////////////////////////////////////
33 // //
34 // Tools (namespace) //
35 // //
36 // Global auxiliary applications and data treatment routines //
37 // //
38 //////////////////////////////////////////////////////////////////////////
39 
40 #include <vector>
41 #include <sstream>
42 #include <iostream>
43 #include <iomanip>
44 #if __cplusplus > 199711L
45 #include <atomic>
46 #endif
47 
48 #ifndef ROOT_TXMLEngine
49 #include "TXMLEngine.h"
50 #endif
51 
52 #ifndef ROOT_TMatrixDSymfwd
53 #include "TMatrixDSymfwd.h"
54 #endif
55 
56 #ifndef ROOT_TMatrixDfwd
57 #include "TMatrixDfwd.h"
58 #endif
59 
60 #ifndef ROOT_TVectorDfwd
61 #include "TVectorDfwd.h"
62 #endif
63 
64 #ifndef ROOT_TVectorDfwd
65 #include "TVectorDfwd.h"
66 #endif
67 
68 #ifndef ROOT_TMVA_Types
69 #include "TMVA/Types.h"
70 #endif
71 
72 #ifndef ROOT_TMVA_VariableTransformBase
74 #endif
75 
76 #include "TString.h"
77 
78 class TList;
79 class TTree;
80 class TH1;
81 class TH2;
82 class TH2F;
83 class TSpline;
84 class TXMLEngine;
85 
86 namespace TMVA {
87 
88  class Event;
89  class PDF;
90  class MsgLogger;
91 
92  class Tools {
93 
94  private:
95 
96  Tools();
97 
98  public:
99 
100  // destructor
101  ~Tools();
102 
103  // accessor to single instance
104  static Tools& Instance();
105  static void DestroyInstance();
106 
107 
108  template <typename T> Double_t Mean(Long64_t n, const T *a, const Double_t *w=0);
109  template <typename Iterator, typename WeightIterator> Double_t Mean ( Iterator first, Iterator last, WeightIterator w);
110 
111  template <typename T> Double_t RMS(Long64_t n, const T *a, const Double_t *w=0);
112  template <typename Iterator, typename WeightIterator> Double_t RMS(Iterator first, Iterator last, WeightIterator w);
113 
114 
115  // simple statistics operations on tree entries
116  void ComputeStat( const std::vector<TMVA::Event*>&,
117  std::vector<Float_t>*,
119  Double_t&, Double_t&, Double_t&, Int_t signalClass,
120  Bool_t norm = kFALSE );
121 
122  // compute variance from sums
123  inline Double_t ComputeVariance( Double_t sumx2, Double_t sumx, Int_t nx );
124 
125  // creates histograms normalized to one
126  TH1* projNormTH1F( TTree* theTree, const TString& theVarName,
127  const TString& name, Int_t nbins,
128  Double_t xmin, Double_t xmax, const TString& cut );
129 
130  // normalize histogram by its integral
131  Double_t NormHist( TH1* theHist, Double_t norm = 1.0 );
132 
133  // parser for TString phrase with items separated by a character
134  TList* ParseFormatLine( TString theString, const char * sep = ":" );
135 
136  // parse option string for ANN methods
137  std::vector<Int_t>* ParseANNOptionString( TString theOptions, Int_t nvar,
138  std::vector<Int_t>* nodes );
139 
140  // returns the square-root of a symmetric matrix: symMat = sqrtMat*sqrtMat
142 
143  // returns the covariance matrix of of the different classes (and the sum)
144  // given the event sample
145  std::vector<TMatrixDSym*>* CalcCovarianceMatrices( const std::vector<Event*>& events, Int_t maxCls, VariableTransformBase* transformBase=0 );
146  std::vector<TMatrixDSym*>* CalcCovarianceMatrices( const std::vector<const Event*>& events, Int_t maxCls, VariableTransformBase* transformBase=0 );
147 
148 
149  // turns covariance into correlation matrix
150  const TMatrixD* GetCorrelationMatrix( const TMatrixD* covMat );
151 
152  // check spline quality by comparison with initial histogram
153  Bool_t CheckSplines( const TH1*, const TSpline* );
154 
155  // normalization of variable output
157 
158  // return separation of two histograms
159  Double_t GetSeparation( TH1* S, TH1* B ) const;
160  Double_t GetSeparation( const PDF& pdfS, const PDF& pdfB ) const;
161 
162  // vector rescaling
163  std::vector<Double_t> MVADiff( std::vector<Double_t>&, std::vector<Double_t>& );
164  void Scale( std::vector<Double_t>&, Double_t );
165  void Scale( std::vector<Float_t>&, Float_t );
166 
167  // re-arrange a vector of arrays (vectors) in a way such that the first array
168  // is ordered, and the other arrays reshuffeld accordingly
169  void UsefulSortDescending( std::vector< std::vector<Double_t> >&, std::vector<TString>* vs = 0 );
170  void UsefulSortAscending ( std::vector< std::vector<Double_t> >&, std::vector<TString>* vs = 0 );
171 
172  void UsefulSortDescending( std::vector<Double_t>& );
173  void UsefulSortAscending ( std::vector<Double_t>& );
174 
175  Int_t GetIndexMaxElement ( std::vector<Double_t>& );
176  Int_t GetIndexMinElement ( std::vector<Double_t>& );
177 
178  // check if input string contains regular expression
180  TString ReplaceRegularExpressions( const TString& s, const TString& replace = "+" );
181 
182  // routines for formatted output -----------------
183  void FormattedOutput( const std::vector<Double_t>&, const std::vector<TString>&,
184  const TString titleVars, const TString titleValues, MsgLogger& logger,
185  TString format = "%+1.3f" );
186  void FormattedOutput( const TMatrixD&, const std::vector<TString>&, MsgLogger& logger );
187  void FormattedOutput( const TMatrixD&, const std::vector<TString>& vert, const std::vector<TString>& horiz,
188  MsgLogger& logger );
189 
190  void WriteFloatArbitraryPrecision( Float_t val, std::ostream& os );
191  void ReadFloatArbitraryPrecision ( Float_t& val, std::istream& is );
192 
193  // for histogramming
194  TString GetXTitleWithUnit( const TString& title, const TString& unit );
195  TString GetYTitleWithUnit( const TH1& h, const TString& unit, Bool_t normalised );
196 
197  // Mutual Information method for non-linear correlations estimates in 2D histogram
198  // Author: Moritz Backes, Geneva (2009)
200 
201  // Correlation Ratio method for non-linear correlations estimates in 2D histogram
202  // Author: Moritz Backes, Geneva (2009)
204  TH2F* TransposeHist ( const TH2F& );
205 
206  // check if "silent" or "verbose" option in configuration string
207  Bool_t CheckForSilentOption ( const TString& ) const;
208  Bool_t CheckForVerboseOption( const TString& ) const;
209 
210  // color information
211  const TString& Color( const TString& );
212 
213  // print welcome message (to be called from, eg, .TMVAlogon)
223 
224  // print TMVA citation (to be called from, eg, .TMVAlogon)
225  enum ECitation { kPlainText = 1,
229 
230  void TMVAWelcomeMessage();
232  void TMVAVersionMessage( MsgLogger& logger );
233  void ROOTVersionMessage( MsgLogger& logger );
234 
235  void TMVACitation( MsgLogger& logger, ECitation citType = kPlainText );
236 
237  // string tools
238 
239  std::vector<TString> SplitString( const TString& theOpt, const char separator ) const;
240 
241  // variables
243  mutable MsgLogger* fLogger;
244  MsgLogger& Log() const { return *fLogger; }
245 #if __cplusplus > 199711L
246  static std::atomic<Tools*> fgTools;
247 #else
248  static Tools* fgTools;
249 #endif
250 
251  // xml tools
252 
255  void WriteTMatrixDToXML ( void* node, const char* name, TMatrixD* mat );
256  void WriteTVectorDToXML ( void* node, const char* name, TVectorD* vec );
257  void ReadTMatrixDFromXML( void* node, const char* name, TMatrixD* mat );
258  void ReadTVectorDFromXML( void* node, const char* name, TVectorD* vec );
260 
261  Bool_t HasAttr ( void* node, const char* attrname );
262  template<typename T>
263  inline void ReadAttr ( void* node, const char* , T& value );
264  void ReadAttr ( void* node, const char* attrname, TString& value );
265  template<typename T>
266  void AddAttr ( void* node, const char* , const T& value, Int_t precision = 16 );
267  void AddAttr ( void* node, const char* attrname, const char* value );
268  void* AddChild ( void* parent, const char* childname, const char* content = 0, bool isRootNode = false );
269  Bool_t AddRawLine ( void* node, const char * raw );
270  Bool_t AddComment ( void* node, const char* comment );
271 
272  void* GetParent( void* child);
273  void* GetChild ( void* parent, const char* childname=0 );
274  void* GetNextChild( void* prevchild, const char* childname=0 );
275  const char* GetContent ( void* node );
276  const char* GetName ( void* node );
277 
279  int xmlenginebuffersize() { return 10000000; }
281 
282  TH1* GetCumulativeDist( TH1* h);
283 
284  private:
285 
286  // utilities for correlation ratio
287  Double_t GetYMean_binX( const TH2& , Int_t bin_x );
288 
289  }; // Common tools
290 
291  Tools& gTools(); // global accessor
292 
293 } // namespace TMVA
294 
295 //_______________________________________________________________________
296 template<typename T> void TMVA::Tools::ReadAttr( void* node, const char* attrname, T& value )
297 {
298  // read attribute from xml
299  TString val;
300  ReadAttr( node, attrname, val );
301  std::stringstream s(val.Data());
302  // coverity[tainted_data_argument]
303  s >> value;
304 }
305 
306 
307 //_______________________________________________________________________
308 template<typename T>
309 void TMVA::Tools::AddAttr( void* node, const char* attrname, const T& value, Int_t precision )
310 {
311  // add attribute to xml
312  std::stringstream s;
313  s.precision( precision );
314  s << std::scientific << value;
315  AddAttr( node, attrname, s.str().c_str() );
316 }
317 
318 //_______________________________________________________________________
320 {
321  // compute variance from given sums
322  if (nx<2) return 0;
323  return (sumx2 - ((sumx*sumx)/static_cast<Double_t>(nx)))/static_cast<Double_t>(nx-1);
324 }
325 
326 
327 
328 #endif
329 
const int nx
Definition: kalman.C:16
Bool_t ContainsRegularExpression(const TString &s)
check if regular expression helper function to search for "$!%^&()&#39;<>?= " in a string ...
Definition: Tools.cxx:793
void Scale(std::vector< Double_t > &, Double_t)
scales double vector
Definition: Tools.cxx:528
static double B[]
static Tools & Instance()
Definition: Tools.cxx:80
TXMLEngine & xmlengine()
Definition: Tools.h:278
void UsefulSortDescending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:573
float xmin
Definition: THbookFile.cxx:93
Double_t RMS(Long64_t n, const T *a, const Double_t *w=0)
Definition: Tools.cxx:1746
long long Long64_t
Definition: RtypesCore.h:69
float Float_t
Definition: RtypesCore.h:53
MsgLogger & Log() const
Definition: Tools.h:244
static const std::string comment("comment")
double T(double x)
Definition: ChebyshevPol.h:34
void ROOTVersionMessage(MsgLogger &logger)
prints the ROOT release number and date
Definition: Tools.cxx:1333
std::vector< TMatrixDSym * > * CalcCovarianceMatrices(const std::vector< Event * > &events, Int_t maxCls, VariableTransformBase *transformBase=0)
compute covariance matrices
Definition: Tools.cxx:1522
TH1 * h
Definition: legend2.C:5
Base class for spline implementation containing the Draw/Paint methods //.
Definition: TSpline.h:22
Bool_t CheckForSilentOption(const TString &) const
check for "silence" option in configuration option string
Definition: Tools.cxx:700
Basic string class.
Definition: TString.h:137
Double_t GetMutualInformation(const TH2F &)
Mutual Information method for non-linear correlations estimates in 2D histogram Author: Moritz Backes...
Definition: Tools.cxx:598
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
void WriteFloatArbitraryPrecision(Float_t val, std::ostream &os)
writes a float value with the available precision to a stream
Definition: Tools.cxx:1067
TArc * a
Definition: textangle.C:12
const Bool_t kFALSE
Definition: Rtypes.h:92
int nbins[3]
static std::string format(double x, double y, int digits, int width)
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:309
Bool_t AddComment(void *node, const char *comment)
Definition: Tools.cxx:1142
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
void ReadTMatrixDFromXML(void *node, const char *name, TMatrixD *mat)
Definition: Tools.cxx:1284
const char * Data() const
Definition: TString.h:349
void * GetParent(void *child)
get parent node
Definition: Tools.cxx:1149
Tools & gTools()
Definition: Tools.cxx:79
Bool_t CheckForVerboseOption(const TString &) const
check if verbosity "V" set in option
Definition: Tools.cxx:717
std::vector< Double_t > MVADiff(std::vector< Double_t > &, std::vector< Double_t > &)
computes difference between two vectors
Definition: Tools.cxx:515
Double_t x[n]
Definition: legend1.C:17
~Tools()
destructor
Definition: Tools.cxx:118
static const std::string separator("@@@")
int xmlenginebuffersize()
Definition: Tools.h:279
Bool_t CheckSplines(const TH1 *, const TSpline *)
Definition: Tools.cxx:487
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
TString StringFromInt(Long_t i)
string tools
Definition: Tools.cxx:1231
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:395
std::vector< Int_t > * ParseANNOptionString(TString theOptions, Int_t nvar, std::vector< Int_t > *nodes)
parse option string for ANN methods default settings (should be defined in theOption string) ...
Definition: Tools.cxx:440
Definition: PDF.h:71
Bool_t AddRawLine(void *node, const char *raw)
XML helpers.
Definition: Tools.cxx:1198
A doubly linked list.
Definition: TList.h:47
static Tools * fgTools
Definition: Tools.h:248
Double_t Mean(Long64_t n, const T *a, const Double_t *w=0)
Definition: Tools.cxx:1695
static void DestroyInstance()
Definition: Tools.cxx:95
RooArgSet S(const RooAbsArg &v1)
MsgLogger * fLogger
Definition: Tools.h:243
EWelcomeMessage
Definition: Tools.h:214
TString StringFromDouble(Double_t d)
string tools
Definition: Tools.cxx:1241
Service class for 2-Dim histogram classes.
Definition: TH2.h:36
TString ReplaceRegularExpressions(const TString &s, const TString &replace="+")
replace regular expressions helper function to remove all occurences "$!%^&()&#39;<>?= " from a string an...
Definition: Tools.cxx:807
void TMVAWelcomeMessage()
direct output, eg, when starting ROOT session -> no use of Logger here
Definition: Tools.cxx:1310
TXMLEngine * fXMLEngine
Definition: Tools.h:280
Int_t GetIndexMaxElement(std::vector< Double_t > &)
find index of maximum entry in vector
Definition: Tools.cxx:757
tomato 2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:255
Bool_t HistoHasEquidistantBins(const TH1 &h)
Definition: Tools.cxx:1495
TMarker * m
Definition: textangle.C:8
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
Definition: Tools.cxx:136
TString GetYTitleWithUnit(const TH1 &h, const TString &unit, Bool_t normalised)
histogramming utility
Definition: Tools.cxx:1057
Tools()
constructor
Definition: Tools.cxx:108
Bool_t HasAttr(void *node, const char *attrname)
add attribute from xml
Definition: Tools.cxx:1104
const char * GetContent(void *node)
XML helpers.
Definition: Tools.cxx:1182
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:296
float xmax
Definition: THbookFile.cxx:93
Double_t ComputeVariance(Double_t sumx2, Double_t sumx, Int_t nx)
Definition: Tools.h:319
TH1 * GetCumulativeDist(TH1 *h)
Definition: Tools.cxx:1760
TString GetXTitleWithUnit(const TString &title, const TString &unit)
histogramming utility
Definition: Tools.cxx:1049
long Long_t
Definition: RtypesCore.h:50
const TString fRegexp
Definition: Tools.h:242
TH1 * projNormTH1F(TTree *theTree, const TString &theVarName, const TString &name, Int_t nbins, Double_t xmin, Double_t xmax, const TString &cut)
projects variable from tree into normalised histogram
Definition: Tools.cxx:378
double Double_t
Definition: RtypesCore.h:55
const TMatrixD * GetCorrelationMatrix(const TMatrixD *covMat)
turns covariance into correlation matrix
Definition: Tools.cxx:337
void WriteTVectorDToXML(void *node, const char *name, TVectorD *vec)
Definition: Tools.cxx:1267
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
The TH1 histogram class.
Definition: TH1.h:80
void UsefulSortAscending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:547
void ReadFloatArbitraryPrecision(Float_t &val, std::istream &is)
reads a float value with the available precision from a stream
Definition: Tools.cxx:1082
void TMVAVersionMessage(MsgLogger &logger)
prints the TMVA release number and date
Definition: Tools.cxx:1324
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:896
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
void WriteTMatrixDToXML(void *node, const char *name, TMatrixD *mat)
XML helpers.
Definition: Tools.cxx:1251
Abstract ClassifierFactory template that handles arbitrary types.
Double_t GetYMean_binX(const TH2 &, Int_t bin_x)
Compute the mean in Y for a given bin X of a 2D histogram.
Definition: Tools.cxx:651
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:413
const char * GetName(void *node)
XML helpers.
Definition: Tools.cxx:1190
A TTree object has a header with a name and a title.
Definition: TTree.h:98
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition: Tools.cxx:127
void TMVACitation(MsgLogger &logger, ECitation citType=kPlainText)
kinds of TMVA citation
Definition: Tools.cxx:1449
TMatrixD * GetSQRootMatrix(TMatrixDSym *symMat)
square-root of symmetric matrix of course the resulting sqrtMat is also symmetric, but it&#39;s easier to treat it as a general matrix
Definition: Tools.cxx:284
Definition: first.py:1
void ReadTVectorDFromXML(void *node, const char *name, TVectorD *vec)
Definition: Tools.cxx:1275
TH2F * TransposeHist(const TH2F &)
Transpose quadratic histogram.
Definition: Tools.cxx:666
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
void ComputeStat(const std::vector< TMVA::Event * > &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:215
const Int_t n
Definition: legend1.C:16
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at &#39;separator&#39; and fills the list &#39;splitV&#39; with the primitive strings ...
Definition: Tools.cxx:1207
Int_t GetIndexMinElement(std::vector< Double_t > &)
find index of minimum entry in vector
Definition: Tools.cxx:774
Double_t GetCorrelationRatio(const TH2F &)
Compute Correlation Ratio of 2D histogram to estimate functional dependency between two variables Aut...
Definition: Tools.cxx:629
char name[80]
Definition: TGX11.cxx:109