ROOT  6.06/09
Reference Guide
Tools.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Tools *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Global auxiliary applications and data treatment routines *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
16  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
17  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
18  * *
19  * Copyright (c) 2005: *
20  * CERN, Switzerland *
21  * U. of Victoria, Canada *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 #ifndef ROOT_TMVA_Tools
30 #define ROOT_TMVA_Tools
31 
32 //////////////////////////////////////////////////////////////////////////
33 // //
34 // Tools (namespace) //
35 // //
36 // Global auxiliary applications and data treatment routines //
37 // //
38 //////////////////////////////////////////////////////////////////////////
39 
40 #include <vector>
41 #include <sstream>
42 #include <iostream>
43 #include <iomanip>
44 #if __cplusplus > 199711L
45 #include <atomic>
46 #endif
47 
48 #ifndef ROOT_TXMLEngine
49 #include "TXMLEngine.h"
50 #endif
51 
52 #ifndef ROOT_TMatrixDSymfwd
53 #include "TMatrixDSymfwd.h"
54 #endif
55 
56 #ifndef ROOT_TMatrixDfwd
57 #include "TMatrixDfwd.h"
58 #endif
59 
60 #ifndef ROOT_TVectorDfwd
61 #include "TVectorDfwd.h"
62 #endif
63 
64 #ifndef ROOT_TVectorDfwd
65 #include "TVectorDfwd.h"
66 #endif
67 
68 #ifndef ROOT_TMVA_Types
69 #include "TMVA/Types.h"
70 #endif
71 
72 #ifndef ROOT_TMVA_VariableTransformBase
74 #endif
75 
76 class TList;
77 class TTree;
78 class TString;
79 class TH1;
80 class TH2;
81 class TH2F;
82 class TSpline;
83 class TXMLEngine;
84 
85 namespace TMVA {
86 
87  class Event;
88  class PDF;
89  class MsgLogger;
90 
91  class Tools {
92 
93  private:
94 
95  Tools();
96 
97  public:
98 
99  // destructor
100  ~Tools();
101 
102  // accessor to single instance
103  static Tools& Instance();
104  static void DestroyInstance();
105 
106 
107  template <typename T> Double_t Mean(Long64_t n, const T *a, const Double_t *w=0);
108  template <typename Iterator, typename WeightIterator> Double_t Mean ( Iterator first, Iterator last, WeightIterator w);
109 
110  template <typename T> Double_t RMS(Long64_t n, const T *a, const Double_t *w=0);
111  template <typename Iterator, typename WeightIterator> Double_t RMS(Iterator first, Iterator last, WeightIterator w);
112 
113 
114  // simple statistics operations on tree entries
115  void ComputeStat( const std::vector<TMVA::Event*>&,
116  std::vector<Float_t>*,
118  Double_t&, Double_t&, Double_t&, Int_t signalClass,
119  Bool_t norm = kFALSE );
120 
121  // compute variance from sums
122  inline Double_t ComputeVariance( Double_t sumx2, Double_t sumx, Int_t nx );
123 
124  // creates histograms normalized to one
125  TH1* projNormTH1F( TTree* theTree, const TString& theVarName,
126  const TString& name, Int_t nbins,
127  Double_t xmin, Double_t xmax, const TString& cut );
128 
129  // normalize histogram by its integral
130  Double_t NormHist( TH1* theHist, Double_t norm = 1.0 );
131 
132  // parser for TString phrase with items separated by a character
133  TList* ParseFormatLine( TString theString, const char * sep = ":" );
134 
135  // parse option string for ANN methods
136  std::vector<Int_t>* ParseANNOptionString( TString theOptions, Int_t nvar,
137  std::vector<Int_t>* nodes );
138 
139  // returns the square-root of a symmetric matrix: symMat = sqrtMat*sqrtMat
141 
142  // returns the covariance matrix of of the different classes (and the sum)
143  // given the event sample
144  std::vector<TMatrixDSym*>* CalcCovarianceMatrices( const std::vector<Event*>& events, Int_t maxCls, VariableTransformBase* transformBase=0 );
145  std::vector<TMatrixDSym*>* CalcCovarianceMatrices( const std::vector<const Event*>& events, Int_t maxCls, VariableTransformBase* transformBase=0 );
146 
147 
148  // turns covariance into correlation matrix
149  const TMatrixD* GetCorrelationMatrix( const TMatrixD* covMat );
150 
151  // check spline quality by comparison with initial histogram
152  Bool_t CheckSplines( const TH1*, const TSpline* );
153 
154  // normalization of variable output
156 
157  // return separation of two histograms
158  Double_t GetSeparation( TH1* S, TH1* B ) const;
159  Double_t GetSeparation( const PDF& pdfS, const PDF& pdfB ) const;
160 
161  // vector rescaling
162  std::vector<Double_t> MVADiff( std::vector<Double_t>&, std::vector<Double_t>& );
163  void Scale( std::vector<Double_t>&, Double_t );
164  void Scale( std::vector<Float_t>&, Float_t );
165 
166  // re-arrange a vector of arrays (vectors) in a way such that the first array
167  // is ordered, and the other arrays reshuffeld accordingly
168  void UsefulSortDescending( std::vector< std::vector<Double_t> >&, std::vector<TString>* vs = 0 );
169  void UsefulSortAscending ( std::vector< std::vector<Double_t> >&, std::vector<TString>* vs = 0 );
170 
171  void UsefulSortDescending( std::vector<Double_t>& );
172  void UsefulSortAscending ( std::vector<Double_t>& );
173 
174  Int_t GetIndexMaxElement ( std::vector<Double_t>& );
175  Int_t GetIndexMinElement ( std::vector<Double_t>& );
176 
177  // check if input string contains regular expression
179  TString ReplaceRegularExpressions( const TString& s, const TString& replace = "+" );
180 
181  // routines for formatted output -----------------
182  void FormattedOutput( const std::vector<Double_t>&, const std::vector<TString>&,
183  const TString titleVars, const TString titleValues, MsgLogger& logger,
184  TString format = "%+1.3f" );
185  void FormattedOutput( const TMatrixD&, const std::vector<TString>&, MsgLogger& logger );
186  void FormattedOutput( const TMatrixD&, const std::vector<TString>& vert, const std::vector<TString>& horiz,
187  MsgLogger& logger );
188 
189  void WriteFloatArbitraryPrecision( Float_t val, std::ostream& os );
190  void ReadFloatArbitraryPrecision ( Float_t& val, std::istream& is );
191 
192  // for histogramming
193  TString GetXTitleWithUnit( const TString& title, const TString& unit );
194  TString GetYTitleWithUnit( const TH1& h, const TString& unit, Bool_t normalised );
195 
196  // Mutual Information method for non-linear correlations estimates in 2D histogram
197  // Author: Moritz Backes, Geneva (2009)
199 
200  // Correlation Ratio method for non-linear correlations estimates in 2D histogram
201  // Author: Moritz Backes, Geneva (2009)
203  TH2F* TransposeHist ( const TH2F& );
204 
205  // check if "silent" or "verbose" option in configuration string
206  Bool_t CheckForSilentOption ( const TString& ) const;
207  Bool_t CheckForVerboseOption( const TString& ) const;
208 
209  // color information
210  const TString& Color( const TString& );
211 
212  // print welcome message (to be called from, eg, .TMVAlogon)
222 
223  // print TMVA citation (to be called from, eg, .TMVAlogon)
224  enum ECitation { kPlainText = 1,
228 
229  void TMVAWelcomeMessage();
231  void TMVAVersionMessage( MsgLogger& logger );
232  void ROOTVersionMessage( MsgLogger& logger );
233 
234  void TMVACitation( MsgLogger& logger, ECitation citType = kPlainText );
235 
236  // string tools
237 
238  std::vector<TString> SplitString( const TString& theOpt, const char separator ) const;
239 
240  // variables
242  mutable MsgLogger* fLogger;
243  MsgLogger& Log() const { return *fLogger; }
244 #if __cplusplus > 199711L
245  static std::atomic<Tools*> fgTools;
246 #else
247  static Tools* fgTools;
248 #endif
249 
250  // xml tools
251 
254  void WriteTMatrixDToXML ( void* node, const char* name, TMatrixD* mat );
255  void WriteTVectorDToXML ( void* node, const char* name, TVectorD* vec );
256  void ReadTMatrixDFromXML( void* node, const char* name, TMatrixD* mat );
257  void ReadTVectorDFromXML( void* node, const char* name, TVectorD* vec );
259 
260  Bool_t HasAttr ( void* node, const char* attrname );
261  template<typename T>
262  inline void ReadAttr ( void* node, const char* , T& value );
263  void ReadAttr ( void* node, const char* attrname, TString& value );
264  template<typename T>
265  void AddAttr ( void* node, const char* , const T& value, Int_t precision = 16 );
266  void AddAttr ( void* node, const char* attrname, const char* value );
267  void* AddChild ( void* parent, const char* childname, const char* content = 0, bool isRootNode = false );
268  Bool_t AddRawLine ( void* node, const char * raw );
269  Bool_t AddComment ( void* node, const char* comment );
270 
271  void* GetParent( void* child);
272  void* GetChild ( void* parent, const char* childname=0 );
273  void* GetNextChild( void* prevchild, const char* childname=0 );
274  const char* GetContent ( void* node );
275  const char* GetName ( void* node );
276 
278  int xmlenginebuffersize() { return 10000000; }
280 
281  TH1* GetCumulativeDist( TH1* h);
282 
283  private:
284 
285  // utilities for correlation ratio
286  Double_t GetYMean_binX( const TH2& , Int_t bin_x );
287 
288  }; // Common tools
289 
290  Tools& gTools(); // global accessor
291 
292 } // namespace TMVA
293 
294 //_______________________________________________________________________
295 template<typename T> void TMVA::Tools::ReadAttr( void* node, const char* attrname, T& value )
296 {
297  // read attribute from xml
298  TString val;
299  ReadAttr( node, attrname, val );
300  std::stringstream s(val.Data());
301  // coverity[tainted_data_argument]
302  s >> value;
303 }
304 
305 
306 //_______________________________________________________________________
307 template<typename T>
308 void TMVA::Tools::AddAttr( void* node, const char* attrname, const T& value, Int_t precision )
309 {
310  // add attribute to xml
311  std::stringstream s;
312  s.precision( precision );
313  s << std::scientific << value;
314  AddAttr( node, attrname, s.str().c_str() );
315 }
316 
317 //_______________________________________________________________________
319 {
320  // compute variance from given sums
321  if (nx<2) return 0;
322  return (sumx2 - ((sumx*sumx)/static_cast<Double_t>(nx)))/static_cast<Double_t>(nx-1);
323 }
324 
325 
326 
327 #endif
328 
const int nx
Definition: kalman.C:16
Bool_t ContainsRegularExpression(const TString &s)
check if regular expression helper function to search for "$!%^&()'<>?= " in a string ...
Definition: Tools.cxx:793
void Scale(std::vector< Double_t > &, Double_t)
scales double vector
Definition: Tools.cxx:528
static double B[]
static Tools & Instance()
Definition: Tools.cxx:80
TXMLEngine & xmlengine()
Definition: Tools.h:277
void UsefulSortDescending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:573
float xmin
Definition: THbookFile.cxx:93
Double_t RMS(Long64_t n, const T *a, const Double_t *w=0)
Definition: Tools.cxx:1745
long long Long64_t
Definition: RtypesCore.h:69
float Float_t
Definition: RtypesCore.h:53
MsgLogger & Log() const
Definition: Tools.h:243
static const std::string comment("comment")
double T(double x)
Definition: ChebyshevPol.h:34
void ROOTVersionMessage(MsgLogger &logger)
prints the ROOT release number and date
Definition: Tools.cxx:1333
std::vector< TMatrixDSym * > * CalcCovarianceMatrices(const std::vector< Event * > &events, Int_t maxCls, VariableTransformBase *transformBase=0)
compute covariance matrices
Definition: Tools.cxx:1521
TH1 * h
Definition: legend2.C:5
Base class for spline implementation containing the Draw/Paint methods //.
Definition: TSpline.h:22
Bool_t CheckForSilentOption(const TString &) const
check for "silence" option in configuration option string
Definition: Tools.cxx:700
Basic string class.
Definition: TString.h:137
Double_t GetMutualInformation(const TH2F &)
Mutual Information method for non-linear correlations estimates in 2D histogram Author: Moritz Backes...
Definition: Tools.cxx:598
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
void WriteFloatArbitraryPrecision(Float_t val, std::ostream &os)
writes a float value with the available precision to a stream
Definition: Tools.cxx:1067
TArc * a
Definition: textangle.C:12
const Bool_t kFALSE
Definition: Rtypes.h:92
int nbins[3]
static std::string format(double x, double y, int digits, int width)
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:308
Bool_t AddComment(void *node, const char *comment)
Definition: Tools.cxx:1142
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
void ReadTMatrixDFromXML(void *node, const char *name, TMatrixD *mat)
Definition: Tools.cxx:1284
const char * Data() const
Definition: TString.h:349
void * GetParent(void *child)
get parent node
Definition: Tools.cxx:1149
Tools & gTools()
Definition: Tools.cxx:79
Bool_t CheckForVerboseOption(const TString &) const
check if verbosity "V" set in option
Definition: Tools.cxx:717
std::vector< Double_t > MVADiff(std::vector< Double_t > &, std::vector< Double_t > &)
computes difference between two vectors
Definition: Tools.cxx:515
Double_t x[n]
Definition: legend1.C:17
~Tools()
destructor
Definition: Tools.cxx:118
static const std::string separator("@@@")
int xmlenginebuffersize()
Definition: Tools.h:278
Bool_t CheckSplines(const TH1 *, const TSpline *)
Definition: Tools.cxx:487
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
TString StringFromInt(Long_t i)
string tools
Definition: Tools.cxx:1231
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:395
std::vector< Int_t > * ParseANNOptionString(TString theOptions, Int_t nvar, std::vector< Int_t > *nodes)
parse option string for ANN methods default settings (should be defined in theOption string) ...
Definition: Tools.cxx:440
Definition: PDF.h:71
Bool_t AddRawLine(void *node, const char *raw)
XML helpers.
Definition: Tools.cxx:1198
A doubly linked list.
Definition: TList.h:47
static Tools * fgTools
Definition: Tools.h:247
Double_t Mean(Long64_t n, const T *a, const Double_t *w=0)
Definition: Tools.cxx:1694
static void DestroyInstance()
Definition: Tools.cxx:95
MsgLogger * fLogger
Definition: Tools.h:242
EWelcomeMessage
Definition: Tools.h:213
TString StringFromDouble(Double_t d)
string tools
Definition: Tools.cxx:1241
Service class for 2-Dim histogram classes.
Definition: TH2.h:36
TString ReplaceRegularExpressions(const TString &s, const TString &replace="+")
replace regular expressions helper function to remove all occurences "$!%^&()'<>?= " from a string an...
Definition: Tools.cxx:807
void TMVAWelcomeMessage()
direct output, eg, when starting ROOT session -> no use of Logger here
Definition: Tools.cxx:1310
TXMLEngine * fXMLEngine
Definition: Tools.h:279
Int_t GetIndexMaxElement(std::vector< Double_t > &)
find index of maximum entry in vector
Definition: Tools.cxx:757
2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:256
Bool_t HistoHasEquidistantBins(const TH1 &h)
Definition: Tools.cxx:1494
TMarker * m
Definition: textangle.C:8
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
Definition: Tools.cxx:136
TString GetYTitleWithUnit(const TH1 &h, const TString &unit, Bool_t normalised)
histogramming utility
Definition: Tools.cxx:1057
Tools()
constructor
Definition: Tools.cxx:108
Bool_t HasAttr(void *node, const char *attrname)
add attribute from xml
Definition: Tools.cxx:1104
const char * GetContent(void *node)
XML helpers.
Definition: Tools.cxx:1182
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:295
float xmax
Definition: THbookFile.cxx:93
Double_t ComputeVariance(Double_t sumx2, Double_t sumx, Int_t nx)
Definition: Tools.h:318
TH1 * GetCumulativeDist(TH1 *h)
Definition: Tools.cxx:1759
TString GetXTitleWithUnit(const TString &title, const TString &unit)
histogramming utility
Definition: Tools.cxx:1049
long Long_t
Definition: RtypesCore.h:50
const TString fRegexp
Definition: Tools.h:241
TH1 * projNormTH1F(TTree *theTree, const TString &theVarName, const TString &name, Int_t nbins, Double_t xmin, Double_t xmax, const TString &cut)
projects variable from tree into normalised histogram
Definition: Tools.cxx:378
double Double_t
Definition: RtypesCore.h:55
const TMatrixD * GetCorrelationMatrix(const TMatrixD *covMat)
turns covariance into correlation matrix
Definition: Tools.cxx:337
static const float S
Definition: mandel.cpp:113
void WriteTVectorDToXML(void *node, const char *name, TVectorD *vec)
Definition: Tools.cxx:1267
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
The TH1 histogram class.
Definition: TH1.h:80
void UsefulSortAscending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:547
void ReadFloatArbitraryPrecision(Float_t &val, std::istream &is)
reads a float value with the available precision from a stream
Definition: Tools.cxx:1082
void TMVAVersionMessage(MsgLogger &logger)
prints the TMVA release number and date
Definition: Tools.cxx:1324
#define name(a, b)
Definition: linkTestLib0.cpp:5
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:896
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
void WriteTMatrixDToXML(void *node, const char *name, TMatrixD *mat)
XML helpers.
Definition: Tools.cxx:1251
Abstract ClassifierFactory template that handles arbitrary types.
Double_t GetYMean_binX(const TH2 &, Int_t bin_x)
Compute the mean in Y for a given bin X of a 2D histogram.
Definition: Tools.cxx:651
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:413
const char * GetName(void *node)
XML helpers.
Definition: Tools.cxx:1190
A TTree object has a header with a name and a title.
Definition: TTree.h:94
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition: Tools.cxx:127
void TMVACitation(MsgLogger &logger, ECitation citType=kPlainText)
kinds of TMVA citation
Definition: Tools.cxx:1448
TMatrixD * GetSQRootMatrix(TMatrixDSym *symMat)
square-root of symmetric matrix of course the resulting sqrtMat is also symmetric, but it's easier to treat it as a general matrix
Definition: Tools.cxx:284
void ReadTVectorDFromXML(void *node, const char *name, TVectorD *vec)
Definition: Tools.cxx:1275
TH2F * TransposeHist(const TH2F &)
Transpose quadratic histogram.
Definition: Tools.cxx:666
float value
Definition: math.cpp:443
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
void ComputeStat(const std::vector< TMVA::Event * > &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:215
const Int_t n
Definition: legend1.C:16
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at 'separator' and fills the list 'splitV' with the primitive strings ...
Definition: Tools.cxx:1207
Int_t GetIndexMinElement(std::vector< Double_t > &)
find index of minimum entry in vector
Definition: Tools.cxx:774
Double_t GetCorrelationRatio(const TH2F &)
Compute Correlation Ratio of 2D histogram to estimate functional dependency between two variables Aut...
Definition: Tools.cxx:629