Logo ROOT   6.10/09
Reference Guide
Tools.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Tools *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Global auxiliary applications and data treatment routines *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
16  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
17  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
18  * *
19  * Copyright (c) 2005: *
20  * CERN, Switzerland *
21  * U. of Victoria, Canada *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 #ifndef ROOT_TMVA_Tools
30 #define ROOT_TMVA_Tools
31 
32 //////////////////////////////////////////////////////////////////////////
33 // //
34 // Tools (namespace) //
35 // //
36 // Global auxiliary applications and data treatment routines //
37 // //
38 //////////////////////////////////////////////////////////////////////////
39 
40 #include <vector>
41 #include <sstream>
42 #include <iostream>
43 #include <iomanip>
44 #if __cplusplus > 199711L
45 #include <atomic>
46 #endif
47 
48 #include "TXMLEngine.h"
49 
50 #include "TMatrixDSymfwd.h"
51 
52 #include "TMatrixDfwd.h"
53 
54 #include "TVectorDfwd.h"
55 
56 #include "TVectorDfwd.h"
57 
58 #include "TMVA/Types.h"
59 
61 
62 #include "TString.h"
63 
64 #include "TMVA/MsgLogger.h"
65 
66 class TList;
67 class TTree;
68 class TH1;
69 class TH2;
70 class TH2F;
71 class TSpline;
72 class TXMLEngine;
73 
74 namespace TMVA {
75 
76  class Event;
77  class PDF;
78  class MsgLogger;
79 
80  class Tools {
81 
82  private:
83 
84  Tools();
85 
86  public:
87 
88  // destructor
89  ~Tools();
90 
91  // accessor to single instance
92  static Tools& Instance();
93  static void DestroyInstance();
94 
95 
96  template <typename T> Double_t Mean(Long64_t n, const T *a, const Double_t *w=0);
97  template <typename Iterator, typename WeightIterator> Double_t Mean ( Iterator first, Iterator last, WeightIterator w);
98 
99  template <typename T> Double_t RMS(Long64_t n, const T *a, const Double_t *w=0);
100  template <typename Iterator, typename WeightIterator> Double_t RMS(Iterator first, Iterator last, WeightIterator w);
101 
102 
103  // simple statistics operations on tree entries
104  void ComputeStat( const std::vector<TMVA::Event*>&,
105  std::vector<Float_t>*,
107  Double_t&, Double_t&, Double_t&, Int_t signalClass,
108  Bool_t norm = kFALSE );
109 
110  // compute variance from sums
111  inline Double_t ComputeVariance( Double_t sumx2, Double_t sumx, Int_t nx );
112 
113  // creates histograms normalized to one
114  TH1* projNormTH1F( TTree* theTree, const TString& theVarName,
115  const TString& name, Int_t nbins,
116  Double_t xmin, Double_t xmax, const TString& cut );
117 
118  // normalize histogram by its integral
119  Double_t NormHist( TH1* theHist, Double_t norm = 1.0 );
120 
121  // parser for TString phrase with items separated by a character
122  TList* ParseFormatLine( TString theString, const char * sep = ":" );
123 
124  // parse option string for ANN methods
125  std::vector<Int_t>* ParseANNOptionString( TString theOptions, Int_t nvar,
126  std::vector<Int_t>* nodes );
127 
128  // returns the square-root of a symmetric matrix: symMat = sqrtMat*sqrtMat
130 
131  // returns the covariance matrix of of the different classes (and the sum)
132  // given the event sample
133  std::vector<TMatrixDSym*>* CalcCovarianceMatrices( const std::vector<Event*>& events, Int_t maxCls, VariableTransformBase* transformBase=0 );
134  std::vector<TMatrixDSym*>* CalcCovarianceMatrices( const std::vector<const Event*>& events, Int_t maxCls, VariableTransformBase* transformBase=0 );
135 
136 
137  // turns covariance into correlation matrix
138  const TMatrixD* GetCorrelationMatrix( const TMatrixD* covMat );
139 
140  // check spline quality by comparison with initial histogram
141  Bool_t CheckSplines( const TH1*, const TSpline* );
142 
143  // normalization of variable output
145 
146  // return separation of two histograms
147  Double_t GetSeparation( TH1* S, TH1* B ) const;
148  Double_t GetSeparation( const PDF& pdfS, const PDF& pdfB ) const;
149 
150  // vector rescaling
151  std::vector<Double_t> MVADiff( std::vector<Double_t>&, std::vector<Double_t>& );
152  void Scale( std::vector<Double_t>&, Double_t );
153  void Scale( std::vector<Float_t>&, Float_t );
154 
155  // re-arrange a vector of arrays (vectors) in a way such that the first array
156  // is ordered, and the other arrays reshuffled accordingly
157  void UsefulSortDescending( std::vector< std::vector<Double_t> >&, std::vector<TString>* vs = 0 );
158  void UsefulSortAscending ( std::vector< std::vector<Double_t> >&, std::vector<TString>* vs = 0 );
159 
160  void UsefulSortDescending( std::vector<Double_t>& );
161  void UsefulSortAscending ( std::vector<Double_t>& );
162 
163  Int_t GetIndexMaxElement ( std::vector<Double_t>& );
164  Int_t GetIndexMinElement ( std::vector<Double_t>& );
165 
166  // check if input string contains regular expression
168  TString ReplaceRegularExpressions( const TString& s, const TString& replace = "+" );
169 
170  // routines for formatted output -----------------
171  void FormattedOutput( const std::vector<Double_t>&, const std::vector<TString>&,
172  const TString titleVars, const TString titleValues, MsgLogger& logger,
173  TString format = "%+1.3f" );
174  void FormattedOutput( const TMatrixD&, const std::vector<TString>&, MsgLogger& logger );
175  void FormattedOutput( const TMatrixD&, const std::vector<TString>& vert, const std::vector<TString>& horiz,
176  MsgLogger& logger );
177 
178  void WriteFloatArbitraryPrecision( Float_t val, std::ostream& os );
179  void ReadFloatArbitraryPrecision ( Float_t& val, std::istream& is );
180 
181  // for histogramming
182  TString GetXTitleWithUnit( const TString& title, const TString& unit );
183  TString GetYTitleWithUnit( const TH1& h, const TString& unit, Bool_t normalised );
184 
185  // Mutual Information method for non-linear correlations estimates in 2D histogram
186  // Author: Moritz Backes, Geneva (2009)
188 
189  // Correlation Ratio method for non-linear correlations estimates in 2D histogram
190  // Author: Moritz Backes, Geneva (2009)
192  TH2F* TransposeHist ( const TH2F& );
193 
194  // check if "silent" or "verbose" option in configuration string
195  Bool_t CheckForSilentOption ( const TString& ) const;
196  Bool_t CheckForVerboseOption( const TString& ) const;
197 
198  // color information
199  const TString& Color( const TString& );
200 
201  // print welcome message (to be called from, eg, .TMVAlogon)
211 
212  // print TMVA citation (to be called from, eg, .TMVAlogon)
213  enum ECitation { kPlainText = 1,
217 
218  void TMVAWelcomeMessage();
220  void TMVAVersionMessage( MsgLogger& logger );
221  void ROOTVersionMessage( MsgLogger& logger );
222 
223  void TMVACitation( MsgLogger& logger, ECitation citType = kPlainText );
224 
225  // string tools
226 
227  std::vector<TString> SplitString( const TString& theOpt, const char separator ) const;
228 
229  // variables
231  mutable MsgLogger* fLogger;
232  MsgLogger& Log() const { return *fLogger; }
233 #if __cplusplus > 199711L
234  static std::atomic<Tools*> fgTools;
235 #else
236  static Tools* fgTools;
237 #endif
238 
239  // xml tools
240 
243  void WriteTMatrixDToXML ( void* node, const char* name, TMatrixD* mat );
244  void WriteTVectorDToXML ( void* node, const char* name, TVectorD* vec );
245  void ReadTMatrixDFromXML( void* node, const char* name, TMatrixD* mat );
246  void ReadTVectorDFromXML( void* node, const char* name, TVectorD* vec );
248 
249  Bool_t HasAttr ( void* node, const char* attrname );
250  template<typename T>
251  inline void ReadAttr ( void* node, const char* , T& value );
252  void ReadAttr ( void* node, const char* attrname, TString& value );
253  void ReadAttr(void *node, const char *, float &value);
254  void ReadAttr(void *node, const char *, int &value);
255  void ReadAttr(void *node, const char *, short &value);
256 
257  template<typename T>
258  void AddAttr ( void* node, const char* , const T& value, Int_t precision = 16 );
259  void AddAttr ( void* node, const char* attrname, const char* value );
260  void* AddChild ( void* parent, const char* childname, const char* content = 0, bool isRootNode = false );
261  Bool_t AddRawLine ( void* node, const char * raw );
262  Bool_t AddComment ( void* node, const char* comment );
263 
264  void* GetParent( void* child);
265  void* GetChild ( void* parent, const char* childname=0 );
266  void* GetNextChild( void* prevchild, const char* childname=0 );
267  const char* GetContent ( void* node );
268  const char* GetName ( void* node );
269 
271  int xmlenginebuffersize() { return 10000000; }
273 
274  TH1* GetCumulativeDist( TH1* h);
275 
276  private:
277 
278  // utilities for correlation ratio
279  Double_t GetYMean_binX( const TH2& , Int_t bin_x );
280 
281  }; // Common tools
282 
283  Tools& gTools(); // global accessor
284 
285 } // namespace TMVA
286 
287 ////////////////////////////////////////////////////////////////////////////////
288 /// read attribute from xml
289 
290 template<typename T> void TMVA::Tools::ReadAttr( void* node, const char* attrname, T& value )
291 {
292  // read attribute from xml
293  const char *val = xmlengine().GetAttr(node, attrname);
294  if (val == 0) {
295  const char *nodename = xmlengine().GetNodeName(node);
296  Log() << kFATAL << "Trying to read non-existing attribute '" << attrname << "' from xml node '" << nodename << "'"
297  << Endl;
298  }
299  std::stringstream s(val);
300  // coverity[tainted_data_argument]
301  s >> value;
302 }
303 
304 ////////////////////////////////////////////////////////////////////////////////
305 /// add attribute to xml
306 
307 template<typename T>
308 void TMVA::Tools::AddAttr( void* node, const char* attrname, const T& value, Int_t precision )
309 {
310  std::stringstream s;
311  s.precision( precision );
312  s << std::scientific << value;
313  AddAttr( node, attrname, s.str().c_str() );
314 }
315 
316 ////////////////////////////////////////////////////////////////////////////////
317 /// compute variance from given sums
318 
320 {
321  if (nx<2) return 0;
322  return (sumx2 - ((sumx*sumx)/static_cast<Double_t>(nx)))/static_cast<Double_t>(nx-1);
323 }
324 
325 #endif
const int nx
Definition: kalman.C:16
Bool_t ContainsRegularExpression(const TString &s)
check if regular expression helper function to search for "$!%^&()&#39;<>?= " in a string ...
Definition: Tools.cxx:795
void Scale(std::vector< Double_t > &, Double_t)
scales double vector
Definition: Tools.cxx:530
static double B[]
static Tools & Instance()
Definition: Tools.cxx:75
TXMLEngine & xmlengine()
Definition: Tools.h:270
void UsefulSortDescending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:575
float xmin
Definition: THbookFile.cxx:93
Double_t RMS(Long64_t n, const T *a, const Double_t *w=0)
Return the Standard Deviation of an array a with length n.
Definition: Tools.cxx:1758
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
long long Long64_t
Definition: RtypesCore.h:69
float Float_t
Definition: RtypesCore.h:53
double T(double x)
Definition: ChebyshevPol.h:34
void ROOTVersionMessage(MsgLogger &logger)
prints the ROOT release number and date
Definition: Tools.cxx:1336
TH1 * h
Definition: legend2.C:5
Base class for spline implementation containing the Draw/Paint methods.
Definition: TSpline.h:20
Basic string class.
Definition: TString.h:129
Double_t GetMutualInformation(const TH2F &)
Mutual Information method for non-linear correlations estimates in 2D histogram Author: Moritz Backes...
Definition: Tools.cxx:600
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
void WriteFloatArbitraryPrecision(Float_t val, std::ostream &os)
writes a float value with the available precision to a stream
Definition: Tools.cxx:1069
TArc * a
Definition: textangle.C:12
int nbins[3]
static std::string format(double x, double y, int digits, int width)
Bool_t CheckForSilentOption(const TString &) const
check for "silence" option in configuration option string
Definition: Tools.cxx:702
MsgLogger & Log() const
Definition: Tools.h:232
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:308
Bool_t AddComment(void *node, const char *comment)
Definition: Tools.cxx:1143
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1135
Global auxiliary applications and data treatment routines.
Definition: Tools.h:80
void ReadTMatrixDFromXML(void *node, const char *name, TMatrixD *mat)
Definition: Tools.cxx:1287
void * GetParent(void *child)
get parent node
Definition: Tools.cxx:1151
Bool_t CheckForVerboseOption(const TString &) const
check if verbosity "V" set in option
Definition: Tools.cxx:719
std::vector< Double_t > MVADiff(std::vector< Double_t > &, std::vector< Double_t > &)
computes difference between two vectors
Definition: Tools.cxx:517
Double_t x[n]
Definition: legend1.C:17
~Tools()
destructor
Definition: Tools.cxx:113
int xmlenginebuffersize()
Definition: Tools.h:271
Bool_t CheckSplines(const TH1 *, const TSpline *)
check quality of splining by comparing splines and histograms in each bin
Definition: Tools.cxx:490
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1161
TString StringFromInt(Long_t i)
string tools
Definition: Tools.cxx:1234
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:394
std::vector< Int_t > * ParseANNOptionString(TString theOptions, Int_t nvar, std::vector< Int_t > *nodes)
parse option string for ANN methods default settings (should be defined in theOption string) ...
Definition: Tools.cxx:454
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
Bool_t AddRawLine(void *node, const char *raw)
XML helpers.
Definition: Tools.cxx:1201
const char * GetNodeName(XMLNodePointer_t xmlnode)
returns name of xmlnode
Definition: TXMLEngine.cxx:930
A doubly linked list.
Definition: TList.h:43
static Tools * fgTools
Definition: Tools.h:236
Double_t Mean(Long64_t n, const T *a, const Double_t *w=0)
Return the weighted mean of an array a with length n.
Definition: Tools.cxx:1702
static void DestroyInstance()
Definition: Tools.cxx:90
RooArgSet S(const RooAbsArg &v1)
MsgLogger * fLogger
Definition: Tools.h:231
EWelcomeMessage
Definition: Tools.h:202
Linear interpolation class.
TString StringFromDouble(Double_t d)
string tools
Definition: Tools.cxx:1244
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
TString ReplaceRegularExpressions(const TString &s, const TString &replace="+")
replace regular expressions helper function to remove all occurrences "$!%^&()&#39;<>?= " from a string and replace all ::,$,*,/,+,- with M,S,T,D,P,M respectively
Definition: Tools.cxx:809
void TMVAWelcomeMessage()
direct output, eg, when starting ROOT session -> no use of Logger here
Definition: Tools.cxx:1313
TXMLEngine * fXMLEngine
Definition: Tools.h:272
Int_t GetIndexMaxElement(std::vector< Double_t > &)
find index of maximum entry in vector
Definition: Tools.cxx:759
tomato 2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:249
Bool_t HistoHasEquidistantBins(const TH1 &h)
Definition: Tools.cxx:1498
TMarker * m
Definition: textangle.C:8
TString GetYTitleWithUnit(const TH1 &h, const TString &unit, Bool_t normalised)
histogramming utility
Definition: Tools.cxx:1059
Tools()
constructor
Definition: Tools.cxx:103
Bool_t HasAttr(void *node, const char *attrname)
add attribute from xml
Definition: Tools.cxx:1105
const char * GetContent(void *node)
XML helpers.
Definition: Tools.cxx:1185
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:290
float xmax
Definition: THbookFile.cxx:93
Tools & gTools()
Double_t ComputeVariance(Double_t sumx2, Double_t sumx, Int_t nx)
compute variance from given sums
Definition: Tools.h:319
TH1 * GetCumulativeDist(TH1 *h)
get the cumulative distribution of a histogram
Definition: Tools.cxx:1771
TString GetXTitleWithUnit(const TString &title, const TString &unit)
histogramming utility
Definition: Tools.cxx:1051
const Bool_t kFALSE
Definition: RtypesCore.h:92
long Long_t
Definition: RtypesCore.h:50
const TString fRegexp
Definition: Tools.h:230
TH1 * projNormTH1F(TTree *theTree, const TString &theVarName, const TString &name, Int_t nbins, Double_t xmin, Double_t xmax, const TString &cut)
projects variable from tree into normalised histogram
Definition: Tools.cxx:377
double Double_t
Definition: RtypesCore.h:55
const char * GetAttr(XMLNodePointer_t xmlnode, const char *name)
returns value of attribute for xmlnode
Definition: TXMLEngine.cxx:460
const TMatrixD * GetCorrelationMatrix(const TMatrixD *covMat)
turns covariance into correlation matrix
Definition: Tools.cxx:336
void WriteTVectorDToXML(void *node, const char *name, TVectorD *vec)
Definition: Tools.cxx:1270
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1173
The TH1 histogram class.
Definition: TH1.h:56
void UsefulSortAscending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:549
void ReadFloatArbitraryPrecision(Float_t &val, std::istream &is)
reads a float value with the available precision from a stream
Definition: Tools.cxx:1084
void TMVAVersionMessage(MsgLogger &logger)
prints the TMVA release number and date
Definition: Tools.cxx:1327
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
void ComputeStat(const std::vector< TMVA::Event *> &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:214
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:898
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
void WriteTMatrixDToXML(void *node, const char *name, TMatrixD *mat)
XML helpers.
Definition: Tools.cxx:1254
Abstract ClassifierFactory template that handles arbitrary types.
Double_t GetYMean_binX(const TH2 &, Int_t bin_x)
Compute the mean in Y for a given bin X of a 2D histogram.
Definition: Tools.cxx:653
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:412
std::vector< TString > SplitString(const TString &theOpt, const char separator) const
splits the option string at &#39;separator&#39; and fills the list &#39;splitV&#39; with the primitive strings ...
Definition: Tools.cxx:1210
const char * GetName(void *node)
XML helpers.
Definition: Tools.cxx:1193
A TTree object has a header with a name and a title.
Definition: TTree.h:78
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition: Tools.cxx:122
void TMVACitation(MsgLogger &logger, ECitation citType=kPlainText)
kinds of TMVA citation
Definition: Tools.cxx:1452
TMatrixD * GetSQRootMatrix(TMatrixDSym *symMat)
square-root of symmetric matrix of course the resulting sqrtMat is also symmetric, but it&#39;s easier to treat it as a general matrix
Definition: Tools.cxx:283
Definition: first.py:1
std::vector< TMatrixDSym * > * CalcCovarianceMatrices(const std::vector< Event *> &events, Int_t maxCls, VariableTransformBase *transformBase=0)
compute covariance matrices
Definition: Tools.cxx:1525
void ReadTVectorDFromXML(void *node, const char *name, TVectorD *vec)
Definition: Tools.cxx:1278
TH2F * TransposeHist(const TH2F &)
Transpose quadratic histogram.
Definition: Tools.cxx:668
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
const Int_t n
Definition: legend1.C:16
Int_t GetIndexMinElement(std::vector< Double_t > &)
find index of minimum entry in vector
Definition: Tools.cxx:776
Double_t GetCorrelationRatio(const TH2F &)
Compute Correlation Ratio of 2D histogram to estimate functional dependency between two variables Aut...
Definition: Tools.cxx:631
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as
Definition: Tools.cxx:133