Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RSqliteDS.hxx
Go to the documentation of this file.
1// Author: Jakob Blomer CERN 07/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2017, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RSQLITEDS
12#define ROOT_RSQLITEDS
13
14#include "ROOT/RDataFrame.hxx"
15#include "ROOT/RDataSource.hxx"
16#include <string_view>
17
18#include <memory>
19#include <string>
20#include <vector>
21
22namespace ROOT::Internal::RDF {
24 void *fValuePtr;
25 void *GetImpl(Long64_t) final { return fValuePtr; }
26
27public:
29};
30} // namespace ROOT::Internal::RDF
31
32namespace ROOT {
33
34namespace RDF {
35
36namespace Internal {
37// Members are defined in RSqliteDS.cxx in order to not pullute this header file with sqlite3.h
38struct RSqliteDSDataSet;
39}
40
41// clang-format off
42/**
43\class ROOT::RDF::RSqliteDS
44\ingroup dataframe
45\brief RSqliteDS is an RDF data source implementation for SQL result sets from sqlite3 files.
46
47The RSqliteDS is able to feed an RDataFrame with data from a SQlite SELECT query. One can use it like
48
49 auto rdf = ROOT::RDF::FromSqlite("/path/to/file.sqlite", "select name from table");
50 auto h = rdf.Define("lName", "name.length()").Histo1D("lName");
51
52The data source has to provide column types for all the columns. Determining column types in SQlite is tricky
53as it is dynamically typed and in principle each row can have different column types. The following heuristics
54is used:
55
56 - If a table column is queried as is ("SELECT colname FROM table"), the default/declared column type is taken.
57 - For expressions ("SELECT 1+1 FROM table"), the type of the first row of the result set determines the column type.
58 That can result in a column to be of thought of type NULL where subsequent rows actually have meaningful values.
59 The provided SELECT query can be used to avoid such ambiguities.
60*/
62private:
63 // clang-format off
64 /// All the types known to SQlite. Changes require changing fgTypeNames, too.
65 enum class ETypes {
66 kInteger,
67 kReal,
68 kText,
69 kBlob,
70 kNull
71 };
72 // clang-format on
73
74 /// Used to hold a single "cell" of the SELECT query's result table. Can be changed to std::variant once available.
75 struct Value_t {
76 explicit Value_t(ETypes type);
77
79 bool fIsActive; ///< Not all columns of the query are necessarily used by the RDF. Allows for skipping them.
81 double fReal;
82 std::string fText;
83 std::vector<unsigned char> fBlob;
84 void *fNull;
85 void *fPtr; ///< Points to one of the values; an address to this pointer is returned by GetColumnReadersImpl.
86 };
87
88 void SqliteError(int errcode);
89
90 std::unique_ptr<Internal::RSqliteDSDataSet> fDataSet;
92 std::vector<std::string> fColumnNames;
93 std::vector<ETypes> fColumnTypes;
94 /// The data source is inherently single-threaded and returns only one row at a time. This vector holds the results.
95 std::vector<Value_t> fValues;
96
97 // clang-format off
98 /// Corresponds to the types defined in ETypes.
99 static constexpr char const *fgTypeNames[] = {
100 "Long64_t",
101 "double",
102 "std::string",
103 "std::vector<unsigned char>",
104 "void *"
105 };
106 // clang-format on
107
108public:
109 RSqliteDS(const std::string &fileName, const std::string &query);
110 // Rule of five
111 RSqliteDS(const RSqliteDS &) = delete;
112 RSqliteDS &operator=(const RSqliteDS &) = delete;
113 RSqliteDS(RSqliteDS &&) = delete;
116
117 void SetNSlots(unsigned int nSlots) final;
118 const std::vector<std::string> &GetColumnNames() const final;
119 bool HasColumn(std::string_view colName) const final;
120 std::string GetTypeName(std::string_view colName) const final;
121 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final;
122 bool SetEntry(unsigned int slot, ULong64_t entry) final;
123 void Initialize() final;
124 std::string GetLabel() final;
125
126 std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
127 GetColumnReaders(unsigned int slot, std::string_view colName, const std::type_info &tid) final;
128
130 Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final;
131};
132
133RDataFrame FromSqlite(std::string_view fileName, std::string_view query);
134
135} // namespace RDF
136
137} // namespace ROOT
138
139#endif
@ kText
Definition Buttons.h:30
long long Long64_t
Portable signed long integer 8 bytes.
Definition RtypesCore.h:83
unsigned long long ULong64_t
Portable unsigned long integer 8 bytes.
Definition RtypesCore.h:84
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
Pure virtual base class for all column reader types.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::vector< void * > Record_t
RSqliteDS is an RDF data source implementation for SQL result sets from sqlite3 files.
Definition RSqliteDS.hxx:61
RSqliteDS & operator=(RSqliteDS &&)=delete
RSqliteDS & operator=(const RSqliteDS &)=delete
RSqliteDS(RSqliteDS &&)=delete
std::vector< std::string > fColumnNames
Definition RSqliteDS.hxx:92
RSqliteDS(const RSqliteDS &)=delete
std::vector< ETypes > fColumnTypes
Definition RSqliteDS.hxx:93
ETypes
All the types known to SQlite. Changes require changing fgTypeNames, too.
Definition RSqliteDS.hxx:65
std::unique_ptr< Internal::RSqliteDSDataSet > fDataSet
Definition RSqliteDS.hxx:90
std::vector< Value_t > fValues
The data source is inherently single-threaded and returns only one row at a time. This vector holds t...
Definition RSqliteDS.hxx:95
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Namespace for new ROOT classes and functions.
The state of an open dataset in terms of the sqlite3 C library.
Used to hold a single "cell" of the SELECT query's result table. Can be changed to std::variant once ...
Definition RSqliteDS.hxx:75
void * fPtr
Points to one of the values; an address to this pointer is returned by GetColumnReadersImpl.
Definition RSqliteDS.hxx:85
std::vector< unsigned char > fBlob
Definition RSqliteDS.hxx:83
bool fIsActive
Not all columns of the query are necessarily used by the RDF. Allows for skipping them.
Definition RSqliteDS.hxx:79