Logo ROOT   6.14/05
Reference Guide
TRDataFrame.h
Go to the documentation of this file.
1 // @(#)root/r:$Id$
2 // Author: Omar Zapata 30/05/2015
3 
4 
5 /*************************************************************************
6  * Copyright (C) 2013-2015, Omar Andres Zapata Mesa *
7  * All rights reserved. *
8  * *
9  * For the licensing terms see $ROOTSYS/LICENSE. *
10  * For the list of contributors see $ROOTSYS/README/CREDITS. *
11  *************************************************************************/
12 #ifndef ROOT_R_TRDataFrame
13 #define ROOT_R_TRDataFrame
14 
15 #include <RExports.h>
16 
17 #include <TRObject.h>
18 
19 #include <TRFunctionImport.h>
20 
21 
22 namespace ROOT {
23  namespace R {
24 
25  /**
26  \class TRDataFrame
27 
28  This is a class to create DataFrames from ROOT to R
29  <center><h2>TRDataFrame class</h2></center>
30 
31  DataFrame is a very important datatype in R and in ROOTR we have a class to manipulate<br>
32  dataframes called TRDataFrame, with a lot of very useful operators overloaded to work with TRDataFrame's objects<br>
33  in a similar way that in the R environment but from c++ in ROOT.<br>
34  Example:<br>
35  <br>
36  Lets to create need data to play with dataframe features<br>
37 
38  <h2>Creating variables</h2><br>
39  \code{.cpp}
40  TVectorD v1(3);
41  std::vector<Double_t> v2(3);
42  std::array<Int_t,3> v3{ {1,2,3} };
43  std::list<std::string> names;
44  \endcode
45 
46  <h2> Assigning values </h2><br>
47  \code{.cpp}
48  v1[0]=1;
49  v1[1]=2;
50  v1[2]=3;
51 
52  v2[0]=0.101;
53  v2[1]=0.202;
54  v2[2]=0.303;
55 
56  names.push_back("v1");
57  names.push_back("v2");
58  names.push_back("v3");
59 
60  ROOT::R::TRInterface &r=ROOT::R::TRInterface::Instance();
61  \endcode
62 
63  In R the dataframe have associate to every column a label,
64  in ROOTR you can have the same label using the class ROOT::R::Label to create a TRDataFrame where you data
65  have a label associate.
66  <h2> Creating dataframe object with its labels</h2> <br>
67  \code{.cpp}
68  using namespace ROOT::R;
69  TRDataFrame df1(Label["var1"]=v1,Label["var2"]=v2,Label["var3"]=v3,Label["strings"]=names);
70  \endcode
71 
72  <h2>Passing dataframe to R's environment</h2><br>
73  \code{.cpp}
74  r["df1"]<<df1;
75  r<<"print(df1)";
76  \endcode
77  Output
78  \code
79  var1 var2 var3 strings
80  1 1 0.101 1 v1
81  2 2 0.202 2 v2
82  3 3 0.303 3 v3
83  \endcode
84 
85  Manipulating data between dataframes
86  <h2>Adding colunms to dataframe</h2><br>
87  \code{.cpp}
88  TVectorD v4(3);
89  //filling the vector fro R's environment
90  r["c(-1,-2,-3)"]>>v4;
91  //adding new colunm to df1 with name var4
92  df1["var4"]=v4;
93  //updating df1 in R's environment
94  r["df1"]<<df1;
95  //printing df1
96  r<<"print(df1)";
97  \endcode
98 
99  Output
100  var1 var2 var3 strings var4
101  1 1 0.101 1 v1 -1
102  2 2 0.202 2 v2 -2
103  3 3 0.303 3 v3 -3
104 
105  <h2>Getting dataframe from R's environment</h2><br>
106  \code{.cpp}
107  ROOT::R::TRDataFrame df2;
108 
109  r<<"df2<-data.frame(v1=c(0.1,0.2,0.3),v2=c(3,2,1))";
110  r["df2"]>>df2;
111 
112  TVectorD v(3);
113  df2["v1"]>>v;
114  v.Print();
115 
116  df2["v2"]>>v;
117  v.Print();
118  \endcode
119 
120  Output
121  \code
122  Vector (3) is as follows
123 
124  | 1 |
125  ------------------
126  0 |0.1
127  1 |0.2
128  2 |0.3
129 
130  Vector (3) is as follows
131 
132  | 1 |
133  ------------------
134  0 |3
135  1 |2
136  2 |1
137  \endcode
138 
139  </h2>Working with colunms between dataframes</h2><br>
140  \code{.cpp}
141  df2["v3"]<<df1["strings"];
142 
143  //updating df2 in R's environment
144  r["df2"]<<df2;
145  r<<"print(df2)";
146  \endcode
147  Output
148  \code
149  v1 v2 v3
150  1 0.1 3 v1
151  2 0.2 2 v2
152  3 0.3 1 v3
153  \endcode
154 
155  <h2>Working with colunms between dataframes</h2><br>
156  \code{.cpp}
157  //passing values from colunm v3 of df2 to var1 of df1
158  df2["v3"]>>df1["var1"];
159  //updating df1 in R's environment
160  r["df1"]<<df1;
161  r<<"print(df1)";
162  \endcode
163  Output
164  \code
165  var1 var2 var3 strings var4
166  1 v1 0.101 1 v1 -1
167  2 v2 0.202 2 v2 -2
168  3 v3 0.303 3 v3 -3
169  \endcode
170  <h2>Users Guide </h2>
171  <a href="http://oproject.org/tiki-index.php?page=ROOT+R+Users+Guide"> http://oproject.org/tiki-index.php?page=ROOT+R+Users+Guide</a><br>
172  <a href="https://root.cern.ch/drupal/content/how-use-r-root-root-r-interface"> https://root.cern.ch/drupal/content/how-use-r-root-root-r-interface</a>
173  @ingroup R
174  */
175 
176 
177  class TRDataFrame: public TObject {
178  friend class TRInterface;
179  friend SEXP Rcpp::wrap<TRDataFrame>(const TRDataFrame &f);
180  protected:
181  Rcpp::DataFrame df; //internal Rcpp::DataFrame
182  public:
183  //Proxy class to use operators for assignation Ex: df["name"]>>object
184  class Binding {
185  friend class TRDataFrame;
186  public:
187  /**
188  Construct a Binding nestead class for facilities with operators
189  \param _df Rcpp::DataFrame (internal from TDataFrame)
190  \param name string to use in assignations
191  */
192  Binding(Rcpp::DataFrame &_df, TString name): fName(name), fDf(_df) {}
193  /**
194  Copy constructor for Binding nestead class
195  \param obj object with Rcpp::DataFame objecta and string with name
196  */
197  Binding(const Binding &obj): fName(obj.fName), fDf(obj.fDf) {}
198  /**
199  template method for operator assignation
200  \param var any R wrappable datatype
201  */
202  template <class T> Binding operator=(T var)
203  {
204  Int_t size = fDf.size(), i = 0;
205  Rcpp::CharacterVector names = fDf.attr("names");
206  Bool_t found = false;
207  while (i < size) {
208  if (names[i] == fName.Data()) {
209  found = true;
210  break;
211  }
212  i++;
213  }
214  if (found) fDf[fName.Data()] = var;
215  else {
216  if (size == 0) {
217  fDf = Rcpp::DataFrame::create(ROOT::R::Label[fName.Data()] = var);
218  } else {
219  Rcpp::List nDf(size + 1);
220  Rcpp::CharacterVector nnames(size + 1);
221  for (i = 0; i < size; i++) {
222  nDf[i] = fDf[i] ;
223  nnames[i] = names[i];
224  }
225  nDf[size] = var;
226  nnames[size] = fName.Data();
227  nDf.attr("class") = fDf.attr("class") ;
228  nDf.attr("row.names") = fDf.attr("row.names") ;
229  nDf.attr("names") = nnames ;
230  fDf = nDf;
231  }
232  }
233  return *this;
234  }
235  /**
236  method for operator assignation of Binding class
237  \param obj other Binding object
238  */
240  {
241  Int_t size = fDf.size(), i = 0;
242  Rcpp::CharacterVector names = fDf.attr("names");
243  Bool_t found = false;
244  while (i < size) {
245  if (names[i] == fName.Data()) {
246  found = true;
247  break;
248  }
249  i++;
250  }
251  if (found) fDf[fName.Data()] = obj.fDf[obj.fName.Data()];
252  else {
253  Rcpp::List nDf(size + 1);
254  Rcpp::CharacterVector nnames(size + 1);
255  for (i = 0; i < size; i++) {
256  nDf[i] = obj.fDf[i] ;
257  nnames[i] = names[i];
258  }
259  nDf[size] = obj.fDf[obj.fName.Data()];
260  nnames[size] = fName.Data();
261 
262  nDf.attr("class") = obj.fDf.attr("class") ;
263  nDf.attr("row.names") = obj.fDf.attr("row.names") ;
264  nDf.attr("names") = nnames ;
265  fDf = nDf;
266  }
267 
268  return *this;
269  }
270 
271  /**
272  Template method for operator >> that lets to use dataframes like streams
273  example: df["v"]>>vector;
274  \param var any datatype that can be assigned from dataframe label
275  */
276  template <class T> Binding &operator >>(T &var)
277  {
278  var = Rcpp::as<T>(fDf[fName.Data()]);
279  return *this;
280  }
282  {
283  var.fDf[var.fName.Data()] = fDf[fName.Data()];
284  return var;
285  }
286 
287  /**
288  Template method for operator << that lets to use dataframes like streams
289  example: df["v"]<<vector;
290  \param var any datatype that can be assigned to dataframe label
291  */
292  template <class T> Binding &operator <<(T var)
293  {
294  Int_t size = fDf.size(), i = 0;
295  Rcpp::CharacterVector names = fDf.attr("names");
296  Bool_t found = false;
297  while (i < size) {
298  if (names[i] == fName.Data()) {
299  found = true;
300  break;
301  }
302  i++;
303  }
304  if (found) fDf[fName.Data()] = var;
305  else {
306  Rcpp::List nDf(size + 1);
307  Rcpp::CharacterVector nnames(size + 1);
308  for (i = 0; i < size; i++) {
309  nDf[i] = fDf[i] ;
310  nnames[i] = names[i];
311  }
312  nDf[size] = var;
313  nnames[size] = fName.Data();
314 
315  nDf.attr("class") = fDf.attr("class") ;
316  nDf.attr("row.names") = fDf.attr("row.names") ;
317  nDf.attr("names") = nnames ;
318  fDf = nDf;
319  }
320  return *this;
321  }
322  template <class T> operator T()
323  {
324  return Rcpp::as<T>(fDf[fName.Data()]);
325  }
326  template <class T> operator T() const
327  {
328  return Rcpp::as<T>(fDf[fName.Data()]);
329  }
330 
331  private:
332  TString fName; //name of label
333  Rcpp::DataFrame &fDf;//internal dataframe
334  };
335 
336  /**
337  Default TDataFrame constructor
338  */
339  TRDataFrame();
340  /**
341  TDataFrame constructor
342  \param obj raw R object that can be casted to DataFrame
343  */
344  TRDataFrame(SEXP obj)
345  {
346  df = Rcpp::as<Rcpp::DataFrame>(obj);
347  }
348  /**
349  TDataFrame copy constructor
350  \param _df other TRDataFrame
351  */
352  TRDataFrame(const TRDataFrame &_df);
353  /**
354  TDataFrame constructor for Rcpp::DataFrame
355  \param _df raw dataframe from Rcpp
356  */
357  TRDataFrame(const Rcpp::DataFrame &_df): df(_df) {};
358 
359 #include <TRDataFrame__ctors.h>
360 
361  Binding operator[](const TString &name);
362 
363  TRDataFrame &operator=(TRDataFrame &obj)
364  {
365  df = obj.df;
366  return *this;
367  }
368 
369  TRDataFrame &operator=(TRDataFrame obj)
370  {
371  df = obj.df;
372  return *this;
373  }
374 
375  TRDataFrame &operator=(SEXP obj)
376  {
377  df = Rcpp::as<Rcpp::DataFrame>(obj);
378  return *this;
379  }
380 
381  operator SEXP()
382  {
383  return df;
384  }
385 
386  operator SEXP() const
387  {
388  return df;
389  }
390 
391  /**
392  Method to get the number of colunms
393  \return number of cols
394  */
395  Int_t GetNcols() { return df.size(); }
396  /**
397  Method to get the number of rows
398  \return number of rows
399  */
400  Int_t GetNrows() { return df.nrows(); }
401  /**
402  Method to get labels of dataframe
403  \return colunms names
404  */
406  {
407  Rcpp::CharacterVector names = df.attr("names");
408  TVectorString rnames(GetNcols());
409  for (Int_t i = 0; i < GetNcols(); i++) rnames[i] = names[i];
410  return rnames;
411  }
412 
413  /**
414  Method to get dataframe as matrix
415  \note only work on numerical dataframes if some column if string or other it will fail
416  \return TMatrixT with a given tamplate data type
417  */
418  template<class T> TMatrixT<T> AsMatrix()
419  {
420  TRFunctionImport asMatrix("as.matrix");
421  return Rcpp::as<TMatrixT<T> >(asMatrix(df));
422  }
423 
424  /**
425  Method to print the dataframe in stdout or a column given the label
426  \param label nomber of the column to print
427  */
428  void Print(const Char_t *label = "")
429  {
430  TRFunctionImport print("print");
431  if (label && !label[0]) {
432  // label is ""
433  print(df);
434  } else {
435  print(df[label]);
436  }
437  }
438  ClassDef(TRDataFrame, 0) //
439  };
440  }
441 }
442 
443 
444 
445 #endif
TVectorString GetColNames()
Method to get labels of dataframe.
Definition: TRDataFrame.h:405
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
void Print(const Char_t *label="")
Method to print the dataframe in stdout or a column given the label.
Definition: TRDataFrame.h:428
double T(double x)
Definition: ChebyshevPol.h:34
Binding & operator<<(T var)
Template method for operator << that lets to use dataframes like streams example: df["v"]<<vector;...
Definition: TRDataFrame.h:292
Binding operator=(Binding obj)
method for operator assignation of Binding class
Definition: TRDataFrame.h:239
Basic string class.
Definition: TString.h:131
#define f(i)
Definition: RSha256.hxx:104
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
Binding operator[](const TString &name)
Definition: TRDataFrame.cxx:26
TMatrixT< T > AsMatrix()
Method to get dataframe as matrix.
Definition: TRDataFrame.h:418
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
Binding(Rcpp::DataFrame &_df, TString name)
Construct a Binding nestead class for facilities with operators.
Definition: TRDataFrame.h:192
TMatrixT.
Definition: TMatrixDfwd.h:22
TRDataFrame(SEXP obj)
TDataFrame constructor.
Definition: TRDataFrame.h:344
TRDataFrame & operator=(TRDataFrame obj)
Definition: TRDataFrame.h:369
Rcpp::DataFrame df
Definition: TRDataFrame.h:181
#define ClassDef(name, id)
Definition: Rtypes.h:320
Binding operator=(T var)
template method for operator assignation
Definition: TRDataFrame.h:202
Binding(const Binding &obj)
Copy constructor for Binding nestead class.
Definition: TRDataFrame.h:197
Int_t GetNcols()
Method to get the number of colunms.
Definition: TRDataFrame.h:395
This is a class to pass functions from ROOT to R.
TRDataFrame & operator=(SEXP obj)
Definition: TRDataFrame.h:375
Int_t GetNrows()
Method to get the number of rows.
Definition: TRDataFrame.h:400
std::vector< TString > TVectorString
Definition: RExports.h:51
void DataFrame()
Definition: DataFrame.C:9
Binding & operator>>(T &var)
Template method for operator >> that lets to use dataframes like streams example: df["v"]>>vector;...
Definition: TRDataFrame.h:276
Mother of all ROOT objects.
Definition: TObject.h:37
char Char_t
Definition: RtypesCore.h:29
Rcpp::internal::NamedPlaceHolder Label
Definition: RExports.cxx:14
TRDataFrame(const Rcpp::DataFrame &_df)
TDataFrame constructor for Rcpp::DataFrame.
Definition: TRDataFrame.h:357
char name[80]
Definition: TGX11.cxx:109
TRDataFrame & operator=(TRDataFrame &obj)
Definition: TRDataFrame.h:363
This is a class to create DataFrames from ROOT to R
Definition: TRDataFrame.h:177
const char * Data() const
Definition: TString.h:364