Configure a Dask connection and visualize the filling of a 1D and 2D histograms distributedly.
This tutorial showcases the process of setting up real-time data representation for distributed computations. By calling the LiveVisualize function, you can observe the canvas updating with the intermediate results of the histograms as the distributed computation progresses.
import ROOT
from dask.distributed import Client, LocalCluster
LiveVisualize = ROOT.RDF.Distributed.LiveVisualize
RDataFrame = ROOT.RDF.Distributed.Dask.RDataFrame
def create_connection():
cluster = LocalCluster(n_workers=4, threads_per_worker=1, processes=True, memory_limit="2GiB")
client = Client(cluster)
return client
def fit_gaus(plot):
plot.Fit("gaus", "Q")
if __name__ == "__main__":
connection = create_connection()
num_entries = 100000000
d = RDataFrame(num_entries, executor=connection, npartitions=30)
dd = d.Define("x", f"gRandom->Gaus(10*rdfentry_/{num_entries}, 2)").Define(
"y", f"gRandom->Gaus(10*rdfentry_/{num_entries}, 3)"
)
h_normal_1d = dd.Histo1D(("normal_1d", "1D Histogram of a Normal Distribution", 100, -10, 20), "x")
h_normal_2d = dd.Histo2D(
("normal_2d", "2D Histogram of a Normal Distribution", 100, -15, 25, 100, -15, 25), "x", "y"
)
LiveVisualize({h_normal_1d: fit_gaus, h_normal_2d: None})
c = ROOT.TCanvas("distrdf003", "distrdf003", 1600, 400)
c.Divide(2, 1)
c.cd(1)
h_normal_1d.Draw()
c.cd(2)
h_normal_2d.Draw()
c.Update()
- Date
- August 2023
- Author
- Silia Taider
Definition in file distrdf003_live_visualization.py.