It ingests climate data and creates a model with fields like AverageTemperature. Then it uses RDataframe to process and filter the climate data for average temperature per city by season. Then it does the same for average temperature per city for the years between 1993-2002, and 2003-2013. Finally, the tutorial visualizes this processed data through histograms.
#include <ROOT/RDataFrame.hxx>
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include <sstream>
#include <stdexcept>
#include <utility>
#include <chrono>
using Clock = std::chrono::high_resolution_clock;
auto result = std::shared_ptr<TH1D>(
static_cast<TH1D *
>(
h.GetPtr()->Clone()));
result->SetDirectory(nullptr);
return result;
}
constexpr const char *kRawDataUrl = "http://root.cern./files/tutorials/GlobalLandTemperaturesByCity.csv";
constexpr const char *kNTupleFileName = "GlobalLandTemperaturesByCity.root";
void Ingest() {
int nRecords = 0;
int nSkipped = 0;
std::cout << "Converting " << kRawDataUrl << " to " << kNTupleFileName << std::endl;
auto model = RNTupleModel::Create();
auto fieldYear = model->MakeField<std::uint32_t>("Year");
auto fieldMonth = model->MakeField<std::uint32_t>("Month");
auto fieldDay = model->MakeField<std::uint32_t>("Day");
auto fieldAvgTemp = model->MakeField<float>("AverageTemperature");
auto fieldTempUncrty = model->MakeField<float>("AverageTemperatureUncertainty");
auto fieldCity = model->MakeField<std::string>("City");
auto fieldCountry = model->MakeField<std::string>("Country");
auto fieldLat = model->MakeField<float>("Latitude");
auto fieldLong = model->MakeField<float>("Longitude");
auto ntuple = RNTupleWriter::Recreate(std::move(model), "GlobalTempData", kNTupleFileName, options);
auto file = RRawFile::Create(kRawDataUrl);
std::string record;
constexpr int kMaxCharsPerLine = 128;
while (
file->Readln(record)) {
if (record.length() >= kMaxCharsPerLine)
throw std::runtime_error("record too long: " + record);
std::replace(record.begin(), record.end(), ',', ' ');
char country[kMaxCharsPerLine];
char city[kMaxCharsPerLine];
int nFields = sscanf(record.c_str(), "%u-%u-%u %f %f %s %s %fN %fE",
fieldYear.get(), fieldMonth.get(), fieldDay.get(),
fieldAvgTemp.get(), fieldTempUncrty.get(), country, city,
fieldLat.get(), fieldLong.get());
if (nFields != 9) {
nSkipped++;
continue;
}
*fieldCountry = country;
*fieldCity = city;
ntuple->Fill();
if (++nRecords % 1000000 == 0)
std::cout << " ... converted " << nRecords << " records" << std::endl;
}
std::cout << nSkipped << " records skipped" << std::endl;
std::cout << nRecords << " records processed" << std::endl;
auto t2 = Clock::now();
std::cout << std::endl
<< "Processing Time: "
<< std::chrono::duration_cast<std::chrono::seconds>(t2 -
t1).count()
<< " seconds\n" << std::endl;
}
void Analyze() {
df.Display()->Print();
auto min_value = df.Min("AverageTemperature");
auto max_value = df.Max("AverageTemperature");
auto fnWinter = [](int month) { return month == 12 || month == 1 || month == 2; };
auto fnSpring = [](int month) { return month == 3 || month == 4 || month == 5; };
auto fnSummer = [](int month) { return month == 6 || month == 7 || month == 8; };
auto fnFall = [](int month) { return month == 9 || month == 10 || month == 11; };
auto dfWinter = df.Filter(fnWinter, {"Month"});
auto dfSpring = df.Filter(fnSpring, {"Month"});
auto dfSummer = df.Filter(fnSummer, {"Month"});
auto dfFall = df.Filter(fnFall, {"Month"});
auto winterCount = dfWinter.Count();
auto springCount = dfSpring.Count();
auto summerCount = dfSummer.Count();
auto fallCount = dfFall.Count();
auto fn1993_to_2002 = [](int year) { return year >= 1993 && year <= 2002; };
auto fn2003_to_2013 = [](int year) { return year >= 2003 && year <= 2013; };
auto df1993_to_2002 = df.Filter(fn1993_to_2002, {"Year"});
auto df2003_to_2013 = df.Filter(fn2003_to_2013, {"Year"});
auto decade_1993_to_2002_Count = *df1993_to_2002.Count();
auto decade_2003_to_2013_Count = *df2003_to_2013.Count();
auto fallHistResultPtr = dfFall.Histo1D({"Fall Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
auto winterHistResultPtr = dfWinter.Histo1D({"Winter Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
auto springHistResultPtr = dfSpring.Histo1D({"Spring Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
auto summerHistResultPtr = dfSummer.Histo1D({"Summer Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
auto hist_1993_to_2002_ResultPtr = df1993_to_2002.Histo1D({"1993_to_2002 Average Temp", "Average Temperature: 1993_to_2002 vs. 2003_to_2013", 100, -40, 40}, "AverageTemperature");
auto hist_2003_to_2013_ResultPtr = df2003_to_2013.Histo1D({"2003_to_2013 Average Temp", "Average Temperature: 1993_to_2002 vs. 2003_to_2013", 100, -40, 40}, "AverageTemperature");
std::cout << std::endl << "The Minimum temperature is: " << *min_value << std::endl;
std::cout << "The Maximum temperature is: " << *max_value << std::endl;
std::cout << std::endl << "The count for Winter: " << *winterCount<< std::endl;
std::cout << "The count for Spring: " << *springCount << std::endl;
std::cout << "The count for Summer: " << *summerCount << std::endl;
std::cout << "The count for Fall: " << *fallCount << std::endl;
std::cout << std::endl << "The count for 1993_to_2002: " << decade_1993_to_2002_Count << std::endl;
std::cout << "The count for 2003_to_2013: " <<decade_2003_to_2013_Count << std::endl;
auto fallHist = GetDrawableHist(fallHistResultPtr);
auto winterHist = GetDrawableHist(winterHistResultPtr);
auto springHist = GetDrawableHist(springHistResultPtr);
auto summerHist = GetDrawableHist(summerHistResultPtr);
fallHist->SetLineWidth(6);
winterHist->SetLineColor(
kBlue);
winterHist->SetLineWidth(6);
springHist->SetLineColor(
kGreen);
springHist->SetLineWidth(6);
summerHist->SetLineColor(
kRed);
summerHist->SetLineWidth(6);
auto hist_1993_to_2002 = GetDrawableHist(hist_1993_to_2002_ResultPtr);
auto hist_2003_to_2013 = GetDrawableHist(hist_2003_to_2013_ResultPtr);
hist_1993_to_2002->SetLineColor(
kViolet);
hist_1993_to_2002->SetLineWidth(6);
hist_2003_to_2013->SetLineColor(
kSpring);
hist_2003_to_2013->SetLineWidth(6);
auto canvas = RCanvas::Create("Average Temperature by Season");
auto legend = std::make_shared<TLegend>(0.15,0.65,0.53,0.85);
legend->AddEntry(fallHist.get(),"fall","l");
legend->AddEntry(winterHist.get(),"winter","l");
legend->AddEntry(springHist.get(),"spring","l");
legend->AddEntry(summerHist.get(),"summer","l");
canvas->Show();
auto canvas2 = RCanvas::Create("Average Temperature: 1993_to_2002 vs. 2003_to_2013");
auto legend2 = std::make_shared<TLegend>(0.1,0.7,0.48,0.9);
legend2->AddEntry(hist_1993_to_2002.get(),"1993_to_2002","l");
legend2->AddEntry(hist_2003_to_2013.get(),"2003_to_2013","l");
canvas2->Show();
}
void global_temperatures() {
Ingest();
}
Analyze();
}
#define R__LOAD_LIBRARY(LIBRARY)
R__EXTERN TSystem * gSystem
Common user-tunable settings for storing ntuples.
void SetCompression(int val)
Provides v7 drawing facilities for TObject types (TGraph, TH1, TH2, etc).
The RRawFile provides read-only access to local and remote files.
Smart pointer for the return type of actions.
1-D histogram with a double per channel (see TH1 documentation)}
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
RDataFrame MakeNTupleDataFrame(std::string_view ntupleName, std::string_view fileName)
@ kUseGeneralPurpose
Use the new recommended general-purpose setting; it is a best trade-off between compression ratio/dec...
NOTE: Until C++ runtime modules are universally used, we explicitly load the ntuple library. Otherwise triggering autoloading from the use of templated types would require an exhaustive enumeration of "all" template instances in the LinkDef file.