Logo ROOT  
Reference Guide
TRDataFrame.h
Go to the documentation of this file.
1// @(#)root/r:$Id$
2// Author: Omar Zapata 30/05/2015
3
4
5/*************************************************************************
6 * Copyright (C) 2013-2015, Omar Andres Zapata Mesa *
7 * All rights reserved. *
8 * *
9 * For the licensing terms see $ROOTSYS/LICENSE. *
10 * For the list of contributors see $ROOTSYS/README/CREDITS. *
11 *************************************************************************/
12#ifndef ROOT_R_TRDataFrame
13#define ROOT_R_TRDataFrame
14
15#include <RExports.h>
16
17#include <TRObject.h>
18
19#include <TRFunctionImport.h>
20
21
22namespace ROOT {
23 namespace R {
24
25 /**
26 \class TRDataFrame
27
28 This is a class to create DataFrames from ROOT to R
29 <center><h2>TRDataFrame class</h2></center>
30
31 DataFrame is a very important datatype in R and in ROOTR we have a class to manipulate<br>
32 dataframes called TRDataFrame, with a lot of very useful operators overloaded to work with TRDataFrame's objects<br>
33 in a similar way that in the R environment but from c++ in ROOT.<br>
34 Example:<br>
35 <br>
36 Lets to create need data to play with dataframe features<br>
37
38 <h2>Creating variables</h2><br>
39 \code{.cpp}
40 TVectorD v1(3);
41 std::vector<Double_t> v2(3);
42 std::array<Int_t,3> v3{ {1,2,3} };
43 std::list<std::string> names;
44 \endcode
45
46 <h2> Assigning values </h2><br>
47 \code{.cpp}
48 v1[0]=1;
49 v1[1]=2;
50 v1[2]=3;
51
52 v2[0]=0.101;
53 v2[1]=0.202;
54 v2[2]=0.303;
55
56 names.push_back("v1");
57 names.push_back("v2");
58 names.push_back("v3");
59
60 ROOT::R::TRInterface &r=ROOT::R::TRInterface::Instance();
61 \endcode
62
63 In R the dataframe have associate to every column a label,
64 in ROOTR you can have the same label using the class ROOT::R::Label to create a TRDataFrame where you data
65 have a label associate.
66 <h2> Creating dataframe object with its labels</h2> <br>
67 \code{.cpp}
68 using namespace ROOT::R;
69 TRDataFrame df1(Label["var1"]=v1,Label["var2"]=v2,Label["var3"]=v3,Label["strings"]=names);
70 \endcode
71
72 <h2>Passing dataframe to R's environment</h2><br>
73 \code{.cpp}
74 r["df1"]<<df1;
75 r<<"print(df1)";
76 \endcode
77 Output
78 \code
79 var1 var2 var3 strings
80 1 1 0.101 1 v1
81 2 2 0.202 2 v2
82 3 3 0.303 3 v3
83 \endcode
84
85 Manipulating data between dataframes
86 <h2>Adding colunms to dataframe</h2><br>
87 \code{.cpp}
88 TVectorD v4(3);
89 //filling the vector fro R's environment
90 r["c(-1,-2,-3)"]>>v4;
91 //adding new colunm to df1 with name var4
92 df1["var4"]=v4;
93 //updating df1 in R's environment
94 r["df1"]<<df1;
95 //printing df1
96 r<<"print(df1)";
97 \endcode
98
99 Output
100 var1 var2 var3 strings var4
101 1 1 0.101 1 v1 -1
102 2 2 0.202 2 v2 -2
103 3 3 0.303 3 v3 -3
104
105 <h2>Getting dataframe from R's environment</h2><br>
106 \code{.cpp}
107 ROOT::R::TRDataFrame df2;
108
109 r<<"df2<-data.frame(v1=c(0.1,0.2,0.3),v2=c(3,2,1))";
110 r["df2"]>>df2;
111
112 TVectorD v(3);
113 df2["v1"]>>v;
114 v.Print();
115
116 df2["v2"]>>v;
117 v.Print();
118 \endcode
119
120 Output
121 \code
122 Vector (3) is as follows
123
124 | 1 |
125 ------------------
126 0 |0.1
127 1 |0.2
128 2 |0.3
129
130 Vector (3) is as follows
131
132 | 1 |
133 ------------------
134 0 |3
135 1 |2
136 2 |1
137 \endcode
138
139 </h2>Working with colunms between dataframes</h2><br>
140 \code{.cpp}
141 df2["v3"]<<df1["strings"];
142
143 //updating df2 in R's environment
144 r["df2"]<<df2;
145 r<<"print(df2)";
146 \endcode
147 Output
148 \code
149 v1 v2 v3
150 1 0.1 3 v1
151 2 0.2 2 v2
152 3 0.3 1 v3
153 \endcode
154
155 <h2>Working with colunms between dataframes</h2><br>
156 \code{.cpp}
157 //passing values from colunm v3 of df2 to var1 of df1
158 df2["v3"]>>df1["var1"];
159 //updating df1 in R's environment
160 r["df1"]<<df1;
161 r<<"print(df1)";
162 \endcode
163 Output
164 \code
165 var1 var2 var3 strings var4
166 1 v1 0.101 1 v1 -1
167 2 v2 0.202 2 v2 -2
168 3 v3 0.303 3 v3 -3
169 \endcode
170 <h2>Users Guide </h2>
171 <a href="https://oproject.org/pages/ROOT%20R%20Users%20Guide"> https://oproject.org/pages/ROOT R Users Guide</a><br>
172 @ingroup R
173 */
174
175
176 class TRDataFrame: public TObject {
177 friend class TRInterface;
178 friend SEXP Rcpp::wrap<TRDataFrame>(const TRDataFrame &f);
179 protected:
180 Rcpp::DataFrame df; //internal Rcpp::DataFrame
181 public:
182 //Proxy class to use operators for assignation Ex: df["name"]>>object
183 class Binding {
184 friend class TRDataFrame;
185 public:
186 /**
187 Construct a Binding nestead class for facilities with operators
188 \param _df Rcpp::DataFrame (internal from TDataFrame)
189 \param name string to use in assignations
190 */
192 /**
193 Copy constructor for Binding nestead class
194 \param obj object with Rcpp::DataFame objecta and string with name
195 */
196 Binding(const Binding &obj): fName(obj.fName), fDf(obj.fDf) {}
197 /**
198 template method for operator assignation
199 \param var any R wrappable datatype
200 */
201 template <class T> Binding operator=(T var)
202 {
203 Int_t size = fDf.size(), i = 0;
204 Rcpp::CharacterVector names = fDf.attr("names");
205 Bool_t found = false;
206 while (i < size) {
207 if (names[i] == fName.Data()) {
208 found = true;
209 break;
210 }
211 i++;
212 }
213 if (found) fDf[fName.Data()] = var;
214 else {
215 if (size == 0) {
216 fDf = Rcpp::DataFrame::create(ROOT::R::Label[fName.Data()] = var);
217 } else {
218 Rcpp::List nDf(size + 1);
219 Rcpp::CharacterVector nnames(size + 1);
220 for (i = 0; i < size; i++) {
221 nDf[i] = fDf[i] ;
222 nnames[i] = names[i];
223 }
224 nDf[size] = var;
225 nnames[size] = fName.Data();
226 nDf.attr("class") = fDf.attr("class") ;
227 nDf.attr("row.names") = fDf.attr("row.names") ;
228 nDf.attr("names") = nnames ;
229 fDf = nDf;
230 }
231 }
232 return *this;
233 }
234 /**
235 method for operator assignation of Binding class
236 \param obj other Binding object
237 */
239 {
240 Int_t size = fDf.size(), i = 0;
241 Rcpp::CharacterVector names = fDf.attr("names");
242 Bool_t found = false;
243 while (i < size) {
244 if (names[i] == fName.Data()) {
245 found = true;
246 break;
247 }
248 i++;
249 }
250 if (found) fDf[fName.Data()] = obj.fDf[obj.fName.Data()];
251 else {
252 Rcpp::List nDf(size + 1);
253 Rcpp::CharacterVector nnames(size + 1);
254 for (i = 0; i < size; i++) {
255 nDf[i] = obj.fDf[i] ;
256 nnames[i] = names[i];
257 }
258 nDf[size] = obj.fDf[obj.fName.Data()];
259 nnames[size] = fName.Data();
260
261 nDf.attr("class") = obj.fDf.attr("class") ;
262 nDf.attr("row.names") = obj.fDf.attr("row.names") ;
263 nDf.attr("names") = nnames ;
264 fDf = nDf;
265 }
266
267 return *this;
268 }
269
270 /**
271 Template method for operator >> that lets to use dataframes like streams
272 example: df["v"]>>vector;
273 \param var any datatype that can be assigned from dataframe label
274 */
275 template <class T> Binding &operator >>(T &var)
276 {
277 var = Rcpp::as<T>(fDf[fName.Data()]);
278 return *this;
279 }
281 {
282 var.fDf[var.fName.Data()] = fDf[fName.Data()];
283 return var;
284 }
285
286 /**
287 Template method for operator << that lets to use dataframes like streams
288 example: df["v"]<<vector;
289 \param var any datatype that can be assigned to dataframe label
290 */
291 template <class T> Binding &operator <<(T var)
292 {
293 Int_t size = fDf.size(), i = 0;
294 Rcpp::CharacterVector names = fDf.attr("names");
295 Bool_t found = false;
296 while (i < size) {
297 if (names[i] == fName.Data()) {
298 found = true;
299 break;
300 }
301 i++;
302 }
303 if (found) fDf[fName.Data()] = var;
304 else {
305 Rcpp::List nDf(size + 1);
306 Rcpp::CharacterVector nnames(size + 1);
307 for (i = 0; i < size; i++) {
308 nDf[i] = fDf[i] ;
309 nnames[i] = names[i];
310 }
311 nDf[size] = var;
312 nnames[size] = fName.Data();
313
314 nDf.attr("class") = fDf.attr("class") ;
315 nDf.attr("row.names") = fDf.attr("row.names") ;
316 nDf.attr("names") = nnames ;
317 fDf = nDf;
318 }
319 return *this;
320 }
321 template <class T> operator T()
322 {
323 return Rcpp::as<T>(fDf[fName.Data()]);
324 }
325 template <class T> operator T() const
326 {
327 return Rcpp::as<T>(fDf[fName.Data()]);
328 }
329
330 private:
331 TString fName; //name of label
332 Rcpp::DataFrame &fDf;//internal dataframe
333 };
334
335 /**
336 Default TDataFrame constructor
337 */
338 TRDataFrame();
339 /**
340 TDataFrame constructor
341 \param obj raw R object that can be casted to DataFrame
342 */
343 TRDataFrame(SEXP obj)
344 {
345 df = Rcpp::as<Rcpp::DataFrame>(obj);
346 }
347 /**
348 TDataFrame copy constructor
349 \param _df other TRDataFrame
350 */
351 TRDataFrame(const TRDataFrame &_df);
352 /**
353 TDataFrame constructor for Rcpp::DataFrame
354 \param _df raw dataframe from Rcpp
355 */
356 TRDataFrame(const Rcpp::DataFrame &_df): df(_df) {};
357
358#include <TRDataFrame__ctors.h>
359
361
363 {
364 df = obj.df;
365 return *this;
366 }
367
369 {
370 df = obj.df;
371 return *this;
372 }
373
375 {
376 df = Rcpp::as<Rcpp::DataFrame>(obj);
377 return *this;
378 }
379
380 operator SEXP()
381 {
382 return df;
383 }
384
385 operator SEXP() const
386 {
387 return df;
388 }
389
390 /**
391 Method to get the number of colunms
392 \return number of cols
393 */
394 Int_t GetNcols() { return df.size(); }
395 /**
396 Method to get the number of rows
397 \return number of rows
398 */
399 Int_t GetNrows() { return df.nrows(); }
400 /**
401 Method to get labels of dataframe
402 \return colunms names
403 */
405 {
406 Rcpp::CharacterVector names = df.attr("names");
407 TVectorString rnames(GetNcols());
408 for (Int_t i = 0; i < GetNcols(); i++) rnames[i] = names[i];
409 return rnames;
410 }
411
412 /**
413 Method to get dataframe as matrix
414 \note only work on numerical dataframes if some column if string or other it will fail
415 \return TMatrixT with a given tamplate data type
416 */
417 template<class T> TMatrixT<T> AsMatrix()
418 {
419 TRFunctionImport asMatrix("as.matrix");
420 return Rcpp::as<TMatrixT<T> >(asMatrix(df));
421 }
422
423 /**
424 Method to print the dataframe in stdout or a column given the label
425 \param label nomber of the column to print
426 */
427 void Print(const Char_t *label = "")
428 {
429 TRFunctionImport print("print");
430 if (label && !label[0]) {
431 // label is ""
432 print(df);
433 } else {
434 print(df[label]);
435 }
436 }
437 ClassDef(TRDataFrame, 0) //
438 };
439 }
440}
441
442
443
444#endif
void Binding()
Definition: Binding.C:21
std::vector< TString > TVectorString
Definition: RExports.h:51
#define f(i)
Definition: RSha256.hxx:104
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
int Int_t
Definition: RtypesCore.h:41
char Char_t
Definition: RtypesCore.h:29
bool Bool_t
Definition: RtypesCore.h:59
#define ClassDef(name, id)
Definition: Rtypes.h:326
char name[80]
Definition: TGX11.cxx:109
void DataFrame()
Definition: DataFrame.C:9
Binding operator=(T var)
template method for operator assignation
Definition: TRDataFrame.h:201
Binding & operator>>(T &var)
Template method for operator >> that lets to use dataframes like streams example: df["v"]>>vector;.
Definition: TRDataFrame.h:275
Binding operator=(Binding obj)
method for operator assignation of Binding class
Definition: TRDataFrame.h:238
Binding(Rcpp::DataFrame &_df, TString name)
Construct a Binding nestead class for facilities with operators.
Definition: TRDataFrame.h:191
Binding & operator<<(T var)
Template method for operator << that lets to use dataframes like streams example: df["v"]<<vector;.
Definition: TRDataFrame.h:291
Binding(const Binding &obj)
Copy constructor for Binding nestead class.
Definition: TRDataFrame.h:196
This is a class to create DataFrames from ROOT to R.
Definition: TRDataFrame.h:176
Binding operator[](const TString &name)
Definition: TRDataFrame.cxx:26
Int_t GetNcols()
Method to get the number of colunms.
Definition: TRDataFrame.h:394
TMatrixT< T > AsMatrix()
Method to get dataframe as matrix.
Definition: TRDataFrame.h:417
void Print(const Char_t *label="")
Method to print the dataframe in stdout or a column given the label.
Definition: TRDataFrame.h:427
TRDataFrame(SEXP obj)
TDataFrame constructor.
Definition: TRDataFrame.h:343
TRDataFrame()
Default TDataFrame constructor.
Definition: TRDataFrame.cxx:14
TRDataFrame & operator=(TRDataFrame obj)
Definition: TRDataFrame.h:368
Rcpp::DataFrame df
Definition: TRDataFrame.h:180
Int_t GetNrows()
Method to get the number of rows.
Definition: TRDataFrame.h:399
TVectorString GetColNames()
Method to get labels of dataframe.
Definition: TRDataFrame.h:404
TRDataFrame & operator=(SEXP obj)
Definition: TRDataFrame.h:374
TRDataFrame(const Rcpp::DataFrame &_df)
TDataFrame constructor for Rcpp::DataFrame.
Definition: TRDataFrame.h:356
TRDataFrame & operator=(TRDataFrame &obj)
Definition: TRDataFrame.h:362
This is a class to pass functions from ROOT to R.
ROOT R was implemented using the R Project library and the modules Rcpp and RInside
Definition: TRInterface.h:136
TMatrixT.
Definition: TMatrixT.h:39
Mother of all ROOT objects.
Definition: TObject.h:37
Basic string class.
Definition: TString.h:131
const char * Data() const
Definition: TString.h:364
double T(double x)
Definition: ChebyshevPol.h:34
const Rcpp::internal::NamedPlaceHolder & Label
VSD Structures.
Definition: StringConv.hxx:21