Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
HeatmapAnalyzer.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * ZW, Zef Wolffs, Nikhef, zefwolffs@gmail.com
5 *
6 * Copyright (c) 2022, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
14
15#include <TSystemDirectory.h>
16#include <TList.h>
17
18#include <nlohmann/json.hpp>
19
20#include <fstream>
21
22namespace RooFit {
23namespace MultiProcess {
24
25namespace Detail {
26
28 nlohmann::json gradients;
29 nlohmann::json metadata;
30 std::vector<nlohmann::json> durations;
31};
32
33} // namespace Detail
34
35namespace {
36
37void sortTaskNames(std::vector<std::string> &task_names)
38{
39 char const *digits = "0123456789";
40 std::vector<int> digit_vec;
41 std::vector<std::pair<int, std::string>> pair_vec;
42 for (auto &&el : task_names) {
43 std::size_t const n = el.find_first_of(digits);
44 pair_vec.push_back(std::make_pair(stoi(el.substr(n)), el));
45 }
46
47 std::sort(pair_vec.begin(), pair_vec.end());
48
49 for (size_t i = 0; i < task_names.size(); i++) {
50 task_names[i] = pair_vec[i].second;
51 }
52}
53
54std::string findTaskForDuration(nlohmann::json durations, int start_t, int end_t)
55{
56 for (auto &&el : durations.items()) {
57 if (el.key().find("eval_partition") != std::string::npos)
58 continue;
59
60 for (size_t idx = 0; idx < durations[el.key()].size(); idx += 2) {
61 if (durations[el.key()][idx] <= start_t && durations[el.key()][idx + 1] >= end_t) {
62 return el.key();
63 }
64 }
65 }
66 return "";
67}
68
69} // namespace
70
71/** \class HeatmapAnalyzer
72 *
73 * \brief Reads and processes logfiles produced by RooFit::MultiProcess::ProcessTimer
74 *
75 * RooFit::MultiProcess::ProcessTimer records timings of multiple processes simultaneously
76 * and allows for these timings to be written out in json format, one for each process.
77 * This class, the HeatmapAnalyzer, can read these json files and produce a heatmap from
78 * them with partial derivatives on the y-axis, likelihood evaluations on the x-axis, and
79 * time expenditures on the z-axis. This class also contains some convenience functions
80 * for inspecting these log files.
81 *
82 * Note that this class requires the logfiles to contain three specific keys in the json:
83 * - `master:gradient` containing an array of gradient timestamps
84 * - `*eval_task*<task_number>` containing an array of task evaluation timestamps.
85 * - `*eval_partition*` containing an array of partition evaluation timestamps
86 */
87
88////////////////////////////////////////////////////////////////////////////////
89/// HeatmapAnalyzer Constructor. This method reads the input files in the folder
90/// specified by the user and creates internal attributes used by the other
91/// methods in this class.
92/// \param[in] logs_dir Directory where log files are stored in the format
93/// outputted by RooFit::MultiProcess::ProcessTimer.
94/// There can be other files in this directory as well.
95HeatmapAnalyzer::HeatmapAnalyzer(std::string const &logs_dir)
96 : jsonData_{std::make_unique<Detail::HeatmapAnalyzerJsonData>()}
97{
98 TSystemDirectory dir(logs_dir.c_str(), logs_dir.c_str());
99 std::unique_ptr<TList> durationFiles{dir.GetListOfFiles()};
100
101 for (TObject *file : *durationFiles) {
102 if (std::string(file->GetName()).find("p_") == std::string::npos)
103 continue;
104
105 std::ifstream f(logs_dir + "/" + std::string(file->GetName()));
106
107 if (std::string(file->GetName()).find("999") != std::string::npos) {
108 jsonData_->gradients = nlohmann::json::parse(f);
109 } else {
110 jsonData_->durations.push_back(nlohmann::json::parse(f));
111 }
112 }
113
114 for (nlohmann::json &durations_json : jsonData_->durations) {
115 for (auto &&el : durations_json.items()) {
116 if (el.key().find("eval_task") != std::string::npos &&
117 std::find(tasks_names_.begin(), tasks_names_.end(), el.key()) == tasks_names_.end()) {
118 tasks_names_.push_back(el.key());
119 } else if (el.key().find("eval_partition") != std::string::npos &&
120 std::find(eval_partitions_names_.begin(), eval_partitions_names_.end(), el.key()) ==
122 eval_partitions_names_.push_back(el.key());
123 } else if (el.key().find("metadata") != std::string::npos) {
124 jsonData_->metadata = durations_json[el.key()];
125 }
126 }
127 }
128
129 for (nlohmann::json &durations_json : jsonData_->durations) {
130 durations_json.erase("metadata");
131 }
132
133 sortTaskNames(tasks_names_);
134}
135
137
138////////////////////////////////////////////////////////////////////////////////
139/// This method is the main functionality in this class. It does the heavy
140/// lifting of matching duration timestamps to tasks and partition evaluations.
141/// \param[in] analyzed_gradient Gradient to analyze. For example, setting to 1
142/// analyzes the first gradient (ordered by time)
143/// in the logs.
144std::unique_ptr<TH2I> HeatmapAnalyzer::analyze(int analyzed_gradient)
145{
146 int gradient_start_t = jsonData_->gradients["master:gradient"][analyzed_gradient * 2 - 2];
147 int gradient_end_t = jsonData_->gradients["master:gradient"][analyzed_gradient * 2 - 1];
148
149 auto total_matrix =
150 std::make_unique<TH2I>("heatmap", "", eval_partitions_names_.size(), 0, 1, tasks_names_.size(), 0, 1);
151
152 // loop over all logfiles stored in durations_
153 for (nlohmann::json &durations_json : jsonData_->durations) {
154 // partial heatmap is the heatmap that will be filled in for the current durations logfile
155 auto partial_matrix =
156 std::make_unique<TH2I>("partial_heatmap", "", eval_partitions_names_.size(), 0, 1, tasks_names_.size(), 0, 1);
157
158 // remove unnecessary components (those that are out of range)
159 for (auto &&el : durations_json.items()) {
160 auto beg_interval =
161 std::upper_bound(durations_json[el.key()].begin(), durations_json[el.key()].end(), gradient_start_t);
162 auto end_interval =
163 std::upper_bound(durations_json[el.key()].begin(), durations_json[el.key()].end(), gradient_end_t);
164 durations_json[el.key()].erase(end_interval, durations_json[el.key()].end());
165 durations_json[el.key()].erase(durations_json[el.key()].begin(), beg_interval);
166 }
167
168 // loops over all evaluated partitions in logfile
169 for (std::string &eval_partition_name : eval_partitions_names_) {
170
171 // for this partition, loops over all durations, i.e. start and end times for partition evaluations, and for
172 // each tries to find the corresponding task
173 for (size_t idx = 0; idx < durations_json[eval_partition_name].size(); idx += 2) {
174 if (durations_json[eval_partition_name][idx + 1] > gradient_end_t ||
175 durations_json[eval_partition_name][idx] < gradient_start_t)
176 continue;
177 std::string task_name = findTaskForDuration(durations_json, durations_json[eval_partition_name][idx],
178 durations_json[eval_partition_name][idx + 1]);
179
180 if (task_name.empty())
181 continue;
182
183 // add found combination of task, partition evaluation, and duration to partial matrix
184 int tasks_idx = find(tasks_names_.begin(), tasks_names_.end(), task_name) - tasks_names_.begin() + 1;
185 int eval_partitions_idx =
186 find(eval_partitions_names_.begin(), eval_partitions_names_.end(), eval_partition_name) -
187 eval_partitions_names_.begin() + 1;
188 partial_matrix->SetBinContent(eval_partitions_idx, tasks_idx,
189 durations_json[eval_partition_name][idx + 1].get<int>() -
190 durations_json[eval_partition_name][idx].get<int>());
191 }
192 }
193 // add all partial matrices to form one matrix with entire gradient evaluation information
194 total_matrix->Add(partial_matrix.get());
195 }
196
197 // do not need the legend in case heatmap is plotted
198 total_matrix->SetStats(false);
199
200 // set the axes labels on the heatmap matrix
201 TAxis *y = total_matrix->GetYaxis();
202 TAxis *x = total_matrix->GetXaxis();
203 for (std::size_t i = 0; i != tasks_names_.size(); ++i) {
204 y->SetBinLabel(i + 1, jsonData_->metadata[0][i].get<std::string>().c_str());
205 y->ChangeLabel(i + 1, 30, 0.01, -1, -1, -1, "");
206 }
207 for (std::size_t i = 0; i != eval_partitions_names_.size(); ++i) {
208 x->SetBinLabel(i + 1, eval_partitions_names_[i].c_str());
209 x->ChangeLabel(i + 1, 30, -1, -1, -1, -1, "");
210 }
211 x->LabelsOption("v");
212
213 return total_matrix;
214}
215
216std::vector<std::string> const HeatmapAnalyzer::getTaskNames()
217{
218 return tasks_names_;
219}
220
221std::vector<std::string> const HeatmapAnalyzer::getPartitionNames()
222{
224}
225
226std::vector<std::string> const HeatmapAnalyzer::getMetadata()
227{
228 std::vector<std::string> out;
229 for (auto const &item : jsonData_->metadata[0]) {
230 out.emplace_back(item.get<std::string>());
231 }
232 return out;
233}
234
235} // namespace MultiProcess
236} // namespace RooFit
#define f(i)
Definition RSha256.hxx:104
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
std::unique_ptr< TH2I > analyze(int analyzed_gradient)
This method is the main functionality in this class.
std::vector< std::string > const getMetadata()
std::vector< std::string > const getPartitionNames()
std::vector< std::string > const getTaskNames()
HeatmapAnalyzer(std::string const &logs_dir)
HeatmapAnalyzer Constructor.
std::vector< std::string > tasks_names_
std::unique_ptr< Detail::HeatmapAnalyzerJsonData > jsonData_
std::vector< std::string > eval_partitions_names_
Class to manage histogram axis.
Definition TAxis.h:31
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
Definition TAxis.cxx:887
Mother of all ROOT objects.
Definition TObject.h:41
Describes an Operating System directory for the browser.
virtual TList * GetListOfFiles() const
Returns a TList of TSystemFile objects representing the contents of the directory.
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition CodegenImpl.h:64