Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
HeatmapAnalyzer.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * ZW, Zef Wolffs, Nikhef, zefwolffs@gmail.com
5 *
6 * Copyright (c) 2022, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
14
15#include <TSystemDirectory.h>
16#include <TList.h>
17
18#include <nlohmann/json.hpp>
19
20#include <fstream>
21
22namespace RooFit {
23namespace MultiProcess {
24
25namespace Detail {
26
28 nlohmann::json gradients;
29 nlohmann::json metadata;
30 std::vector<nlohmann::json> durations;
31};
32
33} // namespace Detail
34
35namespace {
36
37void sortTaskNames(std::vector<std::string> &task_names)
38{
39 char const *digits = "0123456789";
40 std::vector<int> digit_vec;
41 std::vector<std::pair<int, std::string>> pair_vec;
42 for (auto &&el : task_names) {
43 std::size_t const n = el.find_first_of(digits);
44 pair_vec.push_back(std::make_pair(stoi(el.substr(n)), el));
45 }
46
47 std::sort(pair_vec.begin(), pair_vec.end());
48
49 for (size_t i = 0; i < task_names.size(); i++) {
50 task_names[i] = pair_vec[i].second;
51 }
52}
53
54std::string findTaskForDuration(nlohmann::json durations, int start_t, int end_t)
55{
56 for (auto &&el : durations.items()) {
57 if (el.key().find("eval_partition") != std::string::npos)
58 continue;
59
60 for (size_t idx = 0; idx < durations[el.key()].size(); idx += 2) {
61 if (durations[el.key()][idx] <= start_t && durations[el.key()][idx + 1] >= end_t) {
62 return el.key();
63 }
64 }
65 }
66 return "";
67}
68
69} // namespace
70
71/** \class HeatmapAnalyzer
72 *
73 * \brief Reads and processes logfiles produced by RooFit::MultiProcess::ProcessTimer
74 *
75 * RooFit::MultiProcess::ProcessTimer records timings of multiple processes simultaneously
76 * and allows for these timings to be written out in json format, one for each process.
77 * This class, the HeatmapAnalyzer, can read these json files and produce a heatmap from
78 * them with partial derivatives on the y-axis, likelihood evaluations on the x-axis, and
79 * time expenditures on the z-axis. This class also contains some convenience functions
80 * for inspecting these log files.
81 *
82 * Note that this class requires the logfiles to contain three specific keys in the json:
83 * - `master:gradient` containing an array of gradient timestamps
84 * - `*eval_task*<task_number>` containing an array of task evaluation timestamps.
85 * - `*eval_partition*` containing an array of partition evaluation timestamps
86 */
87
88////////////////////////////////////////////////////////////////////////////////
89/// HeatmapAnalyzer Constructor. This method reads the input files in the folder
90/// specified by the user and creates internal attributes used by the other
91/// methods in this class.
92/// \param[in] logs_dir Directory where log files are stored in the format
93/// outputted by RooFit::MultiProcess::ProcessTimer.
94/// There can be other files in this directory as well.
95HeatmapAnalyzer::HeatmapAnalyzer(std::string const &logs_dir)
96 : jsonData_{std::make_unique<Detail::HeatmapAnalyzerJsonData>()}
97{
98 TSystemDirectory dir(logs_dir.c_str(), logs_dir.c_str());
99 std::unique_ptr<TList> durationFiles{dir.GetListOfFiles()};
100
101 for (TObject *file : *durationFiles) {
102 if (std::string(file->GetName()).find("p_") == std::string::npos)
103 continue;
104
105 std::ifstream f(logs_dir + "/" + std::string(file->GetName()));
106
107 if (std::string(file->GetName()).find("999") != std::string::npos)
108 jsonData_->gradients = nlohmann::json::parse(f);
109 else
110 jsonData_->durations.push_back(nlohmann::json::parse(f));
111 }
112
113 for (nlohmann::json &durations_json : jsonData_->durations) {
114 for (auto &&el : durations_json.items()) {
115 if (el.key().find("eval_task") != std::string::npos &&
116 std::find(tasks_names_.begin(), tasks_names_.end(), el.key()) == tasks_names_.end())
117 tasks_names_.push_back(el.key());
118 else if (el.key().find("eval_partition") != std::string::npos &&
119 std::find(eval_partitions_names_.begin(), eval_partitions_names_.end(), el.key()) ==
121 eval_partitions_names_.push_back(el.key());
122 else if (el.key().find("metadata") != std::string::npos) {
123 jsonData_->metadata = durations_json[el.key()];
124 }
125 }
126 }
127
128 for (nlohmann::json &durations_json : jsonData_->durations) {
129 durations_json.erase("metadata");
130 }
131
132 sortTaskNames(tasks_names_);
133}
134
136
137////////////////////////////////////////////////////////////////////////////////
138/// This method is the main functionality in this class. It does the heavy
139/// lifting of matching duration timestamps to tasks and partition evaluations.
140/// \param[in] analyzed_gradient Gradient to analyze. For example, setting to 1
141/// analyzes the first gradient (ordered by time)
142/// in the logs.
143std::unique_ptr<TH2I> HeatmapAnalyzer::analyze(int analyzed_gradient)
144{
145 int gradient_start_t = jsonData_->gradients["master:gradient"][analyzed_gradient * 2 - 2];
146 int gradient_end_t = jsonData_->gradients["master:gradient"][analyzed_gradient * 2 - 1];
147
148 auto total_matrix =
149 std::make_unique<TH2I>("heatmap", "", eval_partitions_names_.size(), 0, 1, tasks_names_.size(), 0, 1);
150
151 // loop over all logfiles stored in durations_
152 for (nlohmann::json &durations_json : jsonData_->durations) {
153 // partial heatmap is the heatmap that will be filled in for the current durations logfile
154 auto partial_matrix =
155 std::make_unique<TH2I>("partial_heatmap", "", eval_partitions_names_.size(), 0, 1, tasks_names_.size(), 0, 1);
156
157 // remove unnecessary components (those that are out of range)
158 for (auto &&el : durations_json.items()) {
159 auto beg_interval =
160 std::upper_bound(durations_json[el.key()].begin(), durations_json[el.key()].end(), gradient_start_t);
161 auto end_interval =
162 std::upper_bound(durations_json[el.key()].begin(), durations_json[el.key()].end(), gradient_end_t);
163 durations_json[el.key()].erase(end_interval, durations_json[el.key()].end());
164 durations_json[el.key()].erase(durations_json[el.key()].begin(), beg_interval);
165 }
166
167 // loops over all evaluated partitions in logfile
168 for (std::string &eval_partition_name : eval_partitions_names_) {
169
170 // for this partition, loops over all durations, i.e. start and end times for partition evaluations, and for
171 // each tries to find the corresponding task
172 for (size_t idx = 0; idx < durations_json[eval_partition_name].size(); idx += 2) {
173 if (durations_json[eval_partition_name][idx + 1] > gradient_end_t ||
174 durations_json[eval_partition_name][idx] < gradient_start_t)
175 continue;
176 std::string task_name = findTaskForDuration(durations_json, durations_json[eval_partition_name][idx],
177 durations_json[eval_partition_name][idx + 1]);
178
179 if (task_name.empty())
180 continue;
181
182 // add found combination of task, partition evaluation, and duration to partial matrix
183 int tasks_idx = find(tasks_names_.begin(), tasks_names_.end(), task_name) - tasks_names_.begin() + 1;
184 int eval_partitions_idx =
185 find(eval_partitions_names_.begin(), eval_partitions_names_.end(), eval_partition_name) -
186 eval_partitions_names_.begin() + 1;
187 partial_matrix->SetBinContent(eval_partitions_idx, tasks_idx,
188 durations_json[eval_partition_name][idx + 1].get<int>() -
189 durations_json[eval_partition_name][idx].get<int>());
190 }
191 }
192 // add all partial matrices to form one matrix with entire gradient evaluation information
193 total_matrix->Add(partial_matrix.get());
194 }
195
196 // do not need the legend in case heatmap is plotted
197 total_matrix->SetStats(false);
198
199 // set the axes labels on the heatmap matrix
200 TAxis *y = total_matrix->GetYaxis();
201 TAxis *x = total_matrix->GetXaxis();
202 for (std::size_t i = 0; i != tasks_names_.size(); ++i) {
203 y->SetBinLabel(i + 1, jsonData_->metadata[0][i].get<std::string>().c_str());
204 y->ChangeLabel(i + 1, 30, 0.01, -1, -1, -1, "");
205 }
206 for (std::size_t i = 0; i != eval_partitions_names_.size(); ++i) {
207 x->SetBinLabel(i + 1, eval_partitions_names_[i].c_str());
208 x->ChangeLabel(i + 1, 30, -1, -1, -1, -1, "");
209 }
210 x->LabelsOption("v");
211
212 return total_matrix;
213}
214
215std::vector<std::string> const HeatmapAnalyzer::getTaskNames()
216{
217 return tasks_names_;
218}
219
220std::vector<std::string> const HeatmapAnalyzer::getPartitionNames()
221{
223}
224
225std::vector<std::string> const HeatmapAnalyzer::getMetadata()
226{
227 std::vector<std::string> out;
228 for (auto const &item : jsonData_->metadata[0]) {
229 out.emplace_back(item.get<std::string>());
230 }
231 return out;
232}
233
234} // namespace MultiProcess
235} // namespace RooFit
#define f(i)
Definition RSha256.hxx:104
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
std::unique_ptr< TH2I > analyze(int analyzed_gradient)
This method is the main functionality in this class.
std::vector< std::string > const getMetadata()
std::vector< std::string > const getPartitionNames()
std::vector< std::string > const getTaskNames()
HeatmapAnalyzer(std::string const &logs_dir)
HeatmapAnalyzer Constructor.
std::vector< std::string > tasks_names_
std::unique_ptr< Detail::HeatmapAnalyzerJsonData > jsonData_
std::vector< std::string > eval_partitions_names_
Class to manage histogram axis.
Definition TAxis.h:31
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
Definition TAxis.cxx:886
Mother of all ROOT objects.
Definition TObject.h:41
Describes an Operating System directory for the browser.
virtual TList * GetListOfFiles() const
Returns a TList of TSystemFile objects representing the contents of the directory.
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition JSONIO.h:26
Definition file.py:1