Logo ROOT   6.16/01
Reference Guide
TRDataFrame.h
Go to the documentation of this file.
1// @(#)root/r:$Id$
2// Author: Omar Zapata 30/05/2015
3
4
5/*************************************************************************
6 * Copyright (C) 2013-2015, Omar Andres Zapata Mesa *
7 * All rights reserved. *
8 * *
9 * For the licensing terms see $ROOTSYS/LICENSE. *
10 * For the list of contributors see $ROOTSYS/README/CREDITS. *
11 *************************************************************************/
12#ifndef ROOT_R_TRDataFrame
13#define ROOT_R_TRDataFrame
14
15#include <RExports.h>
16
17#include <TRObject.h>
18
19#include <TRFunctionImport.h>
20
21
22namespace ROOT {
23 namespace R {
24
25 /**
26 \class TRDataFrame
27
28 This is a class to create DataFrames from ROOT to R
29 <center><h2>TRDataFrame class</h2></center>
30
31 DataFrame is a very important datatype in R and in ROOTR we have a class to manipulate<br>
32 dataframes called TRDataFrame, with a lot of very useful operators overloaded to work with TRDataFrame's objects<br>
33 in a similar way that in the R environment but from c++ in ROOT.<br>
34 Example:<br>
35 <br>
36 Lets to create need data to play with dataframe features<br>
37
38 <h2>Creating variables</h2><br>
39 \code{.cpp}
40 TVectorD v1(3);
41 std::vector<Double_t> v2(3);
42 std::array<Int_t,3> v3{ {1,2,3} };
43 std::list<std::string> names;
44 \endcode
45
46 <h2> Assigning values </h2><br>
47 \code{.cpp}
48 v1[0]=1;
49 v1[1]=2;
50 v1[2]=3;
51
52 v2[0]=0.101;
53 v2[1]=0.202;
54 v2[2]=0.303;
55
56 names.push_back("v1");
57 names.push_back("v2");
58 names.push_back("v3");
59
60 ROOT::R::TRInterface &r=ROOT::R::TRInterface::Instance();
61 \endcode
62
63 In R the dataframe have associate to every column a label,
64 in ROOTR you can have the same label using the class ROOT::R::Label to create a TRDataFrame where you data
65 have a label associate.
66 <h2> Creating dataframe object with its labels</h2> <br>
67 \code{.cpp}
68 using namespace ROOT::R;
69 TRDataFrame df1(Label["var1"]=v1,Label["var2"]=v2,Label["var3"]=v3,Label["strings"]=names);
70 \endcode
71
72 <h2>Passing dataframe to R's environment</h2><br>
73 \code{.cpp}
74 r["df1"]<<df1;
75 r<<"print(df1)";
76 \endcode
77 Output
78 \code
79 var1 var2 var3 strings
80 1 1 0.101 1 v1
81 2 2 0.202 2 v2
82 3 3 0.303 3 v3
83 \endcode
84
85 Manipulating data between dataframes
86 <h2>Adding colunms to dataframe</h2><br>
87 \code{.cpp}
88 TVectorD v4(3);
89 //filling the vector fro R's environment
90 r["c(-1,-2,-3)"]>>v4;
91 //adding new colunm to df1 with name var4
92 df1["var4"]=v4;
93 //updating df1 in R's environment
94 r["df1"]<<df1;
95 //printing df1
96 r<<"print(df1)";
97 \endcode
98
99 Output
100 var1 var2 var3 strings var4
101 1 1 0.101 1 v1 -1
102 2 2 0.202 2 v2 -2
103 3 3 0.303 3 v3 -3
104
105 <h2>Getting dataframe from R's environment</h2><br>
106 \code{.cpp}
107 ROOT::R::TRDataFrame df2;
108
109 r<<"df2<-data.frame(v1=c(0.1,0.2,0.3),v2=c(3,2,1))";
110 r["df2"]>>df2;
111
112 TVectorD v(3);
113 df2["v1"]>>v;
114 v.Print();
115
116 df2["v2"]>>v;
117 v.Print();
118 \endcode
119
120 Output
121 \code
122 Vector (3) is as follows
123
124 | 1 |
125 ------------------
126 0 |0.1
127 1 |0.2
128 2 |0.3
129
130 Vector (3) is as follows
131
132 | 1 |
133 ------------------
134 0 |3
135 1 |2
136 2 |1
137 \endcode
138
139 </h2>Working with colunms between dataframes</h2><br>
140 \code{.cpp}
141 df2["v3"]<<df1["strings"];
142
143 //updating df2 in R's environment
144 r["df2"]<<df2;
145 r<<"print(df2)";
146 \endcode
147 Output
148 \code
149 v1 v2 v3
150 1 0.1 3 v1
151 2 0.2 2 v2
152 3 0.3 1 v3
153 \endcode
154
155 <h2>Working with colunms between dataframes</h2><br>
156 \code{.cpp}
157 //passing values from colunm v3 of df2 to var1 of df1
158 df2["v3"]>>df1["var1"];
159 //updating df1 in R's environment
160 r["df1"]<<df1;
161 r<<"print(df1)";
162 \endcode
163 Output
164 \code
165 var1 var2 var3 strings var4
166 1 v1 0.101 1 v1 -1
167 2 v2 0.202 2 v2 -2
168 3 v3 0.303 3 v3 -3
169 \endcode
170 <h2>Users Guide </h2>
171 <a href="http://oproject.org/tiki-index.php?page=ROOT+R+Users+Guide"> http://oproject.org/tiki-index.php?page=ROOT+R+Users+Guide</a><br>
172 <a href="https://root.cern.ch/drupal/content/how-use-r-root-root-r-interface"> https://root.cern.ch/drupal/content/how-use-r-root-root-r-interface</a>
173 @ingroup R
174 */
175
176
177 class TRDataFrame: public TObject {
178 friend class TRInterface;
179 friend SEXP Rcpp::wrap<TRDataFrame>(const TRDataFrame &f);
180 protected:
181 Rcpp::DataFrame df; //internal Rcpp::DataFrame
182 public:
183 //Proxy class to use operators for assignation Ex: df["name"]>>object
184 class Binding {
185 friend class TRDataFrame;
186 public:
187 /**
188 Construct a Binding nestead class for facilities with operators
189 \param _df Rcpp::DataFrame (internal from TDataFrame)
190 \param name string to use in assignations
191 */
193 /**
194 Copy constructor for Binding nestead class
195 \param obj object with Rcpp::DataFame objecta and string with name
196 */
197 Binding(const Binding &obj): fName(obj.fName), fDf(obj.fDf) {}
198 /**
199 template method for operator assignation
200 \param var any R wrappable datatype
201 */
202 template <class T> Binding operator=(T var)
203 {
204 Int_t size = fDf.size(), i = 0;
205 Rcpp::CharacterVector names = fDf.attr("names");
206 Bool_t found = false;
207 while (i < size) {
208 if (names[i] == fName.Data()) {
209 found = true;
210 break;
211 }
212 i++;
213 }
214 if (found) fDf[fName.Data()] = var;
215 else {
216 if (size == 0) {
217 fDf = Rcpp::DataFrame::create(ROOT::R::Label[fName.Data()] = var);
218 } else {
219 Rcpp::List nDf(size + 1);
220 Rcpp::CharacterVector nnames(size + 1);
221 for (i = 0; i < size; i++) {
222 nDf[i] = fDf[i] ;
223 nnames[i] = names[i];
224 }
225 nDf[size] = var;
226 nnames[size] = fName.Data();
227 nDf.attr("class") = fDf.attr("class") ;
228 nDf.attr("row.names") = fDf.attr("row.names") ;
229 nDf.attr("names") = nnames ;
230 fDf = nDf;
231 }
232 }
233 return *this;
234 }
235 /**
236 method for operator assignation of Binding class
237 \param obj other Binding object
238 */
240 {
241 Int_t size = fDf.size(), i = 0;
242 Rcpp::CharacterVector names = fDf.attr("names");
243 Bool_t found = false;
244 while (i < size) {
245 if (names[i] == fName.Data()) {
246 found = true;
247 break;
248 }
249 i++;
250 }
251 if (found) fDf[fName.Data()] = obj.fDf[obj.fName.Data()];
252 else {
253 Rcpp::List nDf(size + 1);
254 Rcpp::CharacterVector nnames(size + 1);
255 for (i = 0; i < size; i++) {
256 nDf[i] = obj.fDf[i] ;
257 nnames[i] = names[i];
258 }
259 nDf[size] = obj.fDf[obj.fName.Data()];
260 nnames[size] = fName.Data();
261
262 nDf.attr("class") = obj.fDf.attr("class") ;
263 nDf.attr("row.names") = obj.fDf.attr("row.names") ;
264 nDf.attr("names") = nnames ;
265 fDf = nDf;
266 }
267
268 return *this;
269 }
270
271 /**
272 Template method for operator >> that lets to use dataframes like streams
273 example: df["v"]>>vector;
274 \param var any datatype that can be assigned from dataframe label
275 */
276 template <class T> Binding &operator >>(T &var)
277 {
278 var = Rcpp::as<T>(fDf[fName.Data()]);
279 return *this;
280 }
282 {
283 var.fDf[var.fName.Data()] = fDf[fName.Data()];
284 return var;
285 }
286
287 /**
288 Template method for operator << that lets to use dataframes like streams
289 example: df["v"]<<vector;
290 \param var any datatype that can be assigned to dataframe label
291 */
292 template <class T> Binding &operator <<(T var)
293 {
294 Int_t size = fDf.size(), i = 0;
295 Rcpp::CharacterVector names = fDf.attr("names");
296 Bool_t found = false;
297 while (i < size) {
298 if (names[i] == fName.Data()) {
299 found = true;
300 break;
301 }
302 i++;
303 }
304 if (found) fDf[fName.Data()] = var;
305 else {
306 Rcpp::List nDf(size + 1);
307 Rcpp::CharacterVector nnames(size + 1);
308 for (i = 0; i < size; i++) {
309 nDf[i] = fDf[i] ;
310 nnames[i] = names[i];
311 }
312 nDf[size] = var;
313 nnames[size] = fName.Data();
314
315 nDf.attr("class") = fDf.attr("class") ;
316 nDf.attr("row.names") = fDf.attr("row.names") ;
317 nDf.attr("names") = nnames ;
318 fDf = nDf;
319 }
320 return *this;
321 }
322 template <class T> operator T()
323 {
324 return Rcpp::as<T>(fDf[fName.Data()]);
325 }
326 template <class T> operator T() const
327 {
328 return Rcpp::as<T>(fDf[fName.Data()]);
329 }
330
331 private:
332 TString fName; //name of label
333 Rcpp::DataFrame &fDf;//internal dataframe
334 };
335
336 /**
337 Default TDataFrame constructor
338 */
339 TRDataFrame();
340 /**
341 TDataFrame constructor
342 \param obj raw R object that can be casted to DataFrame
343 */
344 TRDataFrame(SEXP obj)
345 {
346 df = Rcpp::as<Rcpp::DataFrame>(obj);
347 }
348 /**
349 TDataFrame copy constructor
350 \param _df other TRDataFrame
351 */
352 TRDataFrame(const TRDataFrame &_df);
353 /**
354 TDataFrame constructor for Rcpp::DataFrame
355 \param _df raw dataframe from Rcpp
356 */
357 TRDataFrame(const Rcpp::DataFrame &_df): df(_df) {};
358
359#include <TRDataFrame__ctors.h>
360
362
364 {
365 df = obj.df;
366 return *this;
367 }
368
370 {
371 df = obj.df;
372 return *this;
373 }
374
376 {
377 df = Rcpp::as<Rcpp::DataFrame>(obj);
378 return *this;
379 }
380
381 operator SEXP()
382 {
383 return df;
384 }
385
386 operator SEXP() const
387 {
388 return df;
389 }
390
391 /**
392 Method to get the number of colunms
393 \return number of cols
394 */
395 Int_t GetNcols() { return df.size(); }
396 /**
397 Method to get the number of rows
398 \return number of rows
399 */
400 Int_t GetNrows() { return df.nrows(); }
401 /**
402 Method to get labels of dataframe
403 \return colunms names
404 */
406 {
407 Rcpp::CharacterVector names = df.attr("names");
408 TVectorString rnames(GetNcols());
409 for (Int_t i = 0; i < GetNcols(); i++) rnames[i] = names[i];
410 return rnames;
411 }
412
413 /**
414 Method to get dataframe as matrix
415 \note only work on numerical dataframes if some column if string or other it will fail
416 \return TMatrixT with a given tamplate data type
417 */
418 template<class T> TMatrixT<T> AsMatrix()
419 {
420 TRFunctionImport asMatrix("as.matrix");
421 return Rcpp::as<TMatrixT<T> >(asMatrix(df));
422 }
423
424 /**
425 Method to print the dataframe in stdout or a column given the label
426 \param label nomber of the column to print
427 */
428 void Print(const Char_t *label = "")
429 {
430 TRFunctionImport print("print");
431 if (label && !label[0]) {
432 // label is ""
433 print(df);
434 } else {
435 print(df[label]);
436 }
437 }
438 ClassDef(TRDataFrame, 0) //
439 };
440 }
441}
442
443
444
445#endif
void Binding()
Definition: Binding.C:21
std::vector< TString > TVectorString
Definition: RExports.h:51
#define f(i)
Definition: RSha256.hxx:104
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
int Int_t
Definition: RtypesCore.h:41
char Char_t
Definition: RtypesCore.h:29
bool Bool_t
Definition: RtypesCore.h:59
#define ClassDef(name, id)
Definition: Rtypes.h:324
void DataFrame()
Definition: DataFrame.C:9
Binding operator=(T var)
template method for operator assignation
Definition: TRDataFrame.h:202
Binding & operator>>(T &var)
Template method for operator >> that lets to use dataframes like streams example: df["v"]>>vector;.
Definition: TRDataFrame.h:276
Binding operator=(Binding obj)
method for operator assignation of Binding class
Definition: TRDataFrame.h:239
Binding(Rcpp::DataFrame &_df, TString name)
Construct a Binding nestead class for facilities with operators.
Definition: TRDataFrame.h:192
Binding & operator<<(T var)
Template method for operator << that lets to use dataframes like streams example: df["v"]<<vector;.
Definition: TRDataFrame.h:292
Binding(const Binding &obj)
Copy constructor for Binding nestead class.
Definition: TRDataFrame.h:197
This is a class to create DataFrames from ROOT to R.
Definition: TRDataFrame.h:177
Binding operator[](const TString &name)
Definition: TRDataFrame.cxx:26
Int_t GetNcols()
Method to get the number of colunms.
Definition: TRDataFrame.h:395
TMatrixT< T > AsMatrix()
Method to get dataframe as matrix.
Definition: TRDataFrame.h:418
void Print(const Char_t *label="")
Method to print the dataframe in stdout or a column given the label.
Definition: TRDataFrame.h:428
TRDataFrame(SEXP obj)
TDataFrame constructor.
Definition: TRDataFrame.h:344
TRDataFrame()
Default TDataFrame constructor.
Definition: TRDataFrame.cxx:14
TRDataFrame & operator=(TRDataFrame obj)
Definition: TRDataFrame.h:369
Rcpp::DataFrame df
Definition: TRDataFrame.h:181
Int_t GetNrows()
Method to get the number of rows.
Definition: TRDataFrame.h:400
TVectorString GetColNames()
Method to get labels of dataframe.
Definition: TRDataFrame.h:405
TRDataFrame & operator=(SEXP obj)
Definition: TRDataFrame.h:375
TRDataFrame(const Rcpp::DataFrame &_df)
TDataFrame constructor for Rcpp::DataFrame.
Definition: TRDataFrame.h:357
TRDataFrame & operator=(TRDataFrame &obj)
Definition: TRDataFrame.h:363
This is a class to pass functions from ROOT to R.
ROOT R was implemented using the R Project library and the modules Rcpp and RInside
Definition: TRInterface.h:137
TMatrixT.
Definition: TMatrixT.h:39
Mother of all ROOT objects.
Definition: TObject.h:37
Basic string class.
Definition: TString.h:131
const char * Data() const
Definition: TString.h:364
double T(double x)
Definition: ChebyshevPol.h:34
Rcpp::internal::NamedPlaceHolder Label
Definition: RExports.cxx:14
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21