Configure a Dask connection and visualize the filling of a 1D and 2D histograms distributedly.
This tutorial showcases the process of setting up real-time data representation for distributed computations. By calling the LiveVisualize function, you can observe the canvas updating with the intermediate results of the histograms as the distributed computation progresses.
from dask.distributed import LocalCluster, Client
import ROOT
LiveVisualize = ROOT.RDF.Experimental.Distributed.LiveVisualize
RDataFrame = ROOT.RDF.Experimental.Distributed.Dask.RDataFrame
def create_connection():
cluster = LocalCluster(n_workers=4, threads_per_worker=1, processes=True, memory_limit="2GiB")
client = Client(cluster)
return client
def fit_gaus(plot):
plot.Fit("gaus")
if __name__ == "__main__":
connection = create_connection()
num_entries = 100000000
d = RDataFrame(num_entries, daskclient=connection, npartitions=30)
dd = d.Define("x", f"gRandom->Gaus(10*rdfentry_/{num_entries}, 2)")\
.Define("y", f"gRandom->Gaus(10*rdfentry_/{num_entries}, 3)")\
h_normal_1d = dd.Histo1D(("normal_1d", "1D Histogram of a Normal Distribution",
100, -10, 20),
"x")
h_normal_2d = dd.Histo2D(("normal_2d", "2D Histogram of a Normal Distribution",
100, -15, 25,
100, -15, 25
), "x", "y")
LiveVisualize({h_normal_1d: fit_gaus, h_normal_2d: None})
c = ROOT.TCanvas("distrdf003", "distrdf003", 1600, 400)
c.Divide(2, 1)
c.cd(1)
h_normal_1d.Draw()
c.cd(2)
h_normal_2d.Draw()
c.Update()
- Date
- August 2023
- Author
- Silia Taider
Definition in file distrdf003_live_visualization.py.