Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
HeatmapAnalyzer.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * ZW, Zef Wolffs, Nikhef, zefwolffs@gmail.com
5 *
6 * Copyright (c) 2022, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
14
15#include "TSystemDirectory.h"
16#include "TList.h"
17
18#include <fstream>
19
20namespace RooFit {
21namespace MultiProcess {
22
23/** \class HeatmapAnalyzer
24 *
25 * \brief Reads and processes logfiles produced by RooFit::MultiProcess::ProcessTimer
26 *
27 * RooFit::MultiProcess::ProcessTimer records timings of multiple processes simultaneously
28 * and allows for these timings to be written out in json format, one for each process.
29 * This class, the HeatmapAnalyzer, can read these json files and produce a heatmap from
30 * them with partial derivatives on the y-axis, likelihood evaluations on the x-axis, and
31 * time expenditures on the z-axis. This class also contains some convenience functions
32 * for inspecting these log files.
33 *
34 * Note that this class requires the logfiles to contain three specific keys in the json:
35 * - `master:gradient` containing an array of gradient timestamps
36 * - `*eval_task*<task_number>` containing an array of task evaluation timestamps.
37 * - `*eval_partition*` containing an array of partition evaluation timestamps
38 */
39
40////////////////////////////////////////////////////////////////////////////////
41/// HeatmapAnalyzer Constructor. This method reads the input files in the folder
42/// specified by the user and creates internal attributes used by the other
43/// methods in this class.
44/// \param[in] logs_dir Directory where log files are stored in the format
45/// outputted by RooFit::MultiProcess::ProcessTimer.
46/// There can be other files in this directory as well.
47HeatmapAnalyzer::HeatmapAnalyzer(std::string const &logs_dir)
48{
49 TSystemDirectory dir(logs_dir.c_str(), logs_dir.c_str());
50 std::unique_ptr<TList> durationFiles{dir.GetListOfFiles()};
51
52 for (TObject *file : *durationFiles) {
53 if (std::string(file->GetName()).find("p_") == std::string::npos)
54 continue;
55
56 std::ifstream f(logs_dir + "/" + std::string(file->GetName()));
57
58 if (std::string(file->GetName()).find("999") != std::string::npos)
59 gradients_ = json::parse(f);
60 else
61 durations_.push_back(json::parse(f));
62 }
63
64 for (json &durations_json : durations_) {
65 for (auto &&el : durations_json.items()) {
66 if (el.key().find("eval_task") != std::string::npos &&
67 std::find(tasks_names_.begin(), tasks_names_.end(), el.key()) == tasks_names_.end())
68 tasks_names_.push_back(el.key());
69 else if (el.key().find("eval_partition") != std::string::npos &&
70 std::find(eval_partitions_names_.begin(), eval_partitions_names_.end(), el.key()) ==
72 eval_partitions_names_.push_back(el.key());
73 else if (el.key().find("metadata") != std::string::npos) {
74 metadata_ = durations_json[el.key()];
75 }
76 }
77 }
78
79 for (json &durations_json : durations_) {
80 durations_json.erase("metadata");
81 }
82
84}
85
86////////////////////////////////////////////////////////////////////////////////
87/// This method is the main functionality in this class. It does the heavy
88/// lifting of matching duration timestamps to tasks and partition evaluations.
89/// \param[in] analyzed_gradient Gradient to analyze. For example, setting to 1
90/// analyzes the first gradient (ordered by time)
91/// in the logs.
92std::unique_ptr<TH2I> HeatmapAnalyzer::analyze(int analyzed_gradient)
93{
94 int gradient_start_t = gradients_["master:gradient"][analyzed_gradient * 2 - 2];
95 int gradient_end_t = gradients_["master:gradient"][analyzed_gradient * 2 - 1];
96
97 std::unique_ptr<TH2I> total_matrix =
98 std::make_unique<TH2I>("heatmap", "", eval_partitions_names_.size(), 0, 1, tasks_names_.size(), 0, 1);
99
100 // loop over all logfiles stored in durations_
101 for (json &durations_json : durations_) {
102 // partial heatmap is the heatmap that will be filled in for the current durations logfile
103 std::unique_ptr<TH2I> partial_matrix =
104 std::make_unique<TH2I>("partial_heatmap", "", eval_partitions_names_.size(), 0, 1, tasks_names_.size(), 0, 1);
105
106 // remove unnecessary components (those that are out of range)
107 for (auto &&el : durations_json.items()) {
108 auto beg_interval =
109 std::upper_bound(durations_json[el.key()].begin(), durations_json[el.key()].end(), gradient_start_t);
110 auto end_interval =
111 std::upper_bound(durations_json[el.key()].begin(), durations_json[el.key()].end(), gradient_end_t);
112 durations_json[el.key()].erase(end_interval, durations_json[el.key()].end());
113 durations_json[el.key()].erase(durations_json[el.key()].begin(), beg_interval);
114 }
115
116 // loops over all evaluated partitions in logfile
117 for (std::string &eval_partition_name : eval_partitions_names_) {
118
119 // for this partition, loops over all durations, i.e. start and end times for partition evaluations, and for
120 // each tries to find the corresponding task
121 for (size_t idx = 0; idx < durations_json[eval_partition_name].size(); idx += 2) {
122 if (durations_json[eval_partition_name][idx + 1] > gradient_end_t ||
123 durations_json[eval_partition_name][idx] < gradient_start_t)
124 continue;
125 std::string task_name = findTaskForDuration(durations_json, durations_json[eval_partition_name][idx],
126 durations_json[eval_partition_name][idx + 1]);
127
128 if (task_name == "")
129 continue;
130
131 // add found combination of task, partition evaluation, and duration to partial matrix
132 int tasks_idx = find(tasks_names_.begin(), tasks_names_.end(), task_name) - tasks_names_.begin() + 1;
133 int eval_partitions_idx =
134 find(eval_partitions_names_.begin(), eval_partitions_names_.end(), eval_partition_name) -
135 eval_partitions_names_.begin() + 1;
136 partial_matrix->SetBinContent(eval_partitions_idx, tasks_idx,
137 durations_json[eval_partition_name][idx + 1].get<int>() -
138 durations_json[eval_partition_name][idx].get<int>());
139 }
140 }
141 // add all partial matrices to form one matrix with entire gradient evaluation information
142 total_matrix->Add(partial_matrix.get());
143 }
144
145 // do not need the legend in case heatmap is plotted
146 total_matrix->SetStats(0);
147
148 // set the axes labels on the heatmap matrix
149 TAxis *y = total_matrix->GetYaxis();
150 TAxis *x = total_matrix->GetXaxis();
151 for (std::size_t i = 0; i != tasks_names_.size(); ++i) {
152 y->SetBinLabel(i + 1, (metadata_[0][i].get<std::string>()).c_str());
153 y->ChangeLabel(i + 1, 30, 0.01, -1, -1, -1, "");
154 }
155 for (std::size_t i = 0; i != eval_partitions_names_.size(); ++i) {
156 x->SetBinLabel(i + 1, (eval_partitions_names_[i]).c_str());
157 x->ChangeLabel(i + 1, 30, -1, -1, -1, -1, "");
158 }
159 x->LabelsOption("v");
160
161 return total_matrix;
162}
163
164std::vector<std::string> const HeatmapAnalyzer::getTaskNames()
165{
166 return tasks_names_;
167};
168
169std::vector<std::string> const HeatmapAnalyzer::getPartitionNames()
170{
172};
173
175{
176 return metadata_;
177};
178
179std::string HeatmapAnalyzer::findTaskForDuration(json durations, int start_t, int end_t)
180{
181 for (auto &&el : durations.items()) {
182 if (el.key().find("eval_partition") != std::string::npos)
183 continue;
184
185 for (size_t idx = 0; idx < durations[el.key()].size(); idx += 2) {
186 if (durations[el.key()][idx] <= start_t && durations[el.key()][idx + 1] >= end_t) {
187 return el.key();
188 }
189 }
190 }
191 return "";
192}
193
194void HeatmapAnalyzer::sortTaskNames(std::vector<std::string> &task_names)
195{
196 char const *digits = "0123456789";
197 std::vector<int> digit_vec;
198 std::vector<std::pair<int, std::string>> pair_vec;
199 for (auto &&el : task_names) {
200 std::size_t const n = el.find_first_of(digits);
201 pair_vec.push_back(std::make_pair(stoi(el.substr(n)), el));
202 }
203
204 std::sort(pair_vec.begin(), pair_vec.end());
205
206 for (size_t i = 0; i < task_names.size(); i++) {
207 task_names[i] = pair_vec[i].second;
208 }
209}
210
211} // namespace MultiProcess
212} // namespace RooFit
nlohmann::json json
#define f(i)
Definition RSha256.hxx:104
std::unique_ptr< TH2I > analyze(int analyzed_gradient)
This method is the main functionality in this class.
std::vector< std::string > const getPartitionNames()
std::vector< std::string > const getTaskNames()
HeatmapAnalyzer(std::string const &logs_dir)
HeatmapAnalyzer Constructor.
std::vector< std::string > tasks_names_
std::string findTaskForDuration(json durations, int start_t, int end_t)
void sortTaskNames(std::vector< std::string > &task_names)
std::vector< std::string > eval_partitions_names_
Class to manage histogram axis.
Definition TAxis.h:30
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
Definition TAxis.cxx:851
Mother of all ROOT objects.
Definition TObject.h:41
Describes an Operating System directory for the browser.
virtual TList * GetListOfFiles() const
Returns a TList of TSystemFile objects representing the contents of the directory.
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition Common.h:18
Definition file.py:1