It ingests climate data and creates a model with fields like AverageTemperature. Then it uses RDataframe to process and filter the climate data for average temperature per city by season. Then it does the same for average temperature per city for the years between 1993-2002, and 2003-2013. Finally, the tutorial visualizes this processed data through histograms.
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include <sstream>
#include <stdexcept>
#include <utility>
#include <chrono>
using Clock = std::chrono::high_resolution_clock;
{
auto result = std::shared_ptr<TH1D>(
static_cast<TH1D *
>(
h.GetPtr()->Clone()));
result->SetDirectory(
nullptr);
}
constexpr const char *kRawDataUrl = "http://root.cern./files/tutorials/GlobalLandTemperaturesByCity.csv";
constexpr const char *kNTupleFileName = "GlobalLandTemperaturesByCity.root";
void Ingest()
{
int nRecords = 0;
int nSkipped = 0;
std::cout << "Converting " << kRawDataUrl << " to " << kNTupleFileName << std::endl;
auto model = RNTupleModel::Create();
auto fieldYear = model->MakeField<std::uint32_t>("Year");
auto fieldMonth = model->MakeField<std::uint32_t>("Month");
auto fieldDay = model->MakeField<std::uint32_t>("Day");
auto fieldAvgTemp = model->MakeField<float>("AverageTemperature");
auto fieldTempUncrty = model->MakeField<float>("AverageTemperatureUncertainty");
auto fieldCity = model->MakeField<std::string>("City");
auto fieldCountry = model->MakeField<std::string>("Country");
auto fieldLat = model->MakeField<float>("Latitude");
auto fieldLong = model->MakeField<float>("Longitude");
auto ntuple = RNTupleWriter::Recreate(std::move(model), "GlobalTempData", kNTupleFileName);
auto file = RRawFile::Create(kRawDataUrl, options);
std::string record;
constexpr int kMaxCharsPerLine = 128;
while (file->Readln(record)) {
if (record.length() >= kMaxCharsPerLine)
throw std::runtime_error("record too long: " + record);
std::replace(record.begin(), record.end(), ',', ' ');
char country[kMaxCharsPerLine];
char city[kMaxCharsPerLine];
int nFields =
sscanf(record.c_str(), "%u-%u-%u %f %f %s %s %fN %fE", fieldYear.get(), fieldMonth.get(), fieldDay.get(),
fieldAvgTemp.get(), fieldTempUncrty.get(), country, city, fieldLat.get(), fieldLong.get());
if (nFields != 9) {
nSkipped++;
continue;
}
*fieldCountry = country;
*fieldCity = city;
ntuple->Fill();
if (++nRecords % 1000000 == 0)
std::cout << " ... converted " << nRecords << " records" << std::endl;
}
std::cout << nSkipped << " records skipped" << std::endl;
std::cout << nRecords << " records processed" << std::endl;
auto t2 = Clock::now();
std::cout << std::endl
<<
"Processing Time: " << std::chrono::duration_cast<std::chrono::seconds>(t2 -
t1).count() <<
" seconds\n"
<< std::endl;
}
void Analyze()
{
df.Display()->Print();
auto min_value = df.Min("AverageTemperature");
auto max_value = df.Max("AverageTemperature");
auto fnWinter = [](int month) { return month == 12 || month == 1 || month == 2; };
auto fnSpring = [](int month) { return month == 3 || month == 4 || month == 5; };
auto fnSummer = [](int month) { return month == 6 || month == 7 || month == 8; };
auto fnFall = [](int month) { return month == 9 || month == 10 || month == 11; };
auto dfWinter = df.Filter(fnWinter, {"Month"});
auto dfSpring = df.Filter(fnSpring, {"Month"});
auto dfSummer = df.Filter(fnSummer, {"Month"});
auto dfFall = df.Filter(fnFall, {"Month"});
auto winterCount = dfWinter.Count();
auto springCount = dfSpring.Count();
auto summerCount = dfSummer.Count();
auto fallCount = dfFall.Count();
auto fn1993_to_2002 = [](int year) { return year >= 1993 && year <= 2002; };
auto fn2003_to_2013 = [](int year) { return year >= 2003 && year <= 2013; };
auto df1993_to_2002 = df.Filter(fn1993_to_2002, {"Year"});
auto df2003_to_2013 = df.Filter(fn2003_to_2013, {"Year"});
auto decade_1993_to_2002_Count = *df1993_to_2002.Count();
auto decade_2003_to_2013_Count = *df2003_to_2013.Count();
auto fallHistResultPtr =
dfFall.Histo1D({"Fall Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
auto winterHistResultPtr =
dfWinter.Histo1D({"Winter Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
auto springHistResultPtr =
dfSpring.Histo1D({"Spring Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
auto summerHistResultPtr =
dfSummer.Histo1D({"Summer Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
auto hist_1993_to_2002_ResultPtr = df1993_to_2002.Histo1D(
{"1993_to_2002 Average Temp", "Average Temperature: 1993_to_2002 vs. 2003_to_2013", 100, -40, 40},
"AverageTemperature");
auto hist_2003_to_2013_ResultPtr = df2003_to_2013.Histo1D(
{"2003_to_2013 Average Temp", "Average Temperature: 1993_to_2002 vs. 2003_to_2013", 100, -40, 40},
"AverageTemperature");
std::cout << std::endl << "The Minimum temperature is: " << *min_value << std::endl;
std::cout << "The Maximum temperature is: " << *max_value << std::endl;
std::cout << std::endl << "The count for Winter: " << *winterCount << std::endl;
std::cout << "The count for Spring: " << *springCount << std::endl;
std::cout << "The count for Summer: " << *summerCount << std::endl;
std::cout << "The count for Fall: " << *fallCount << std::endl;
std::cout << std::endl << "The count for 1993_to_2002: " << decade_1993_to_2002_Count << std::endl;
std::cout << "The count for 2003_to_2013: " << decade_2003_to_2013_Count << std::endl;
auto fallHist = GetDrawableHist(fallHistResultPtr);
auto winterHist = GetDrawableHist(winterHistResultPtr);
auto springHist = GetDrawableHist(springHistResultPtr);
auto summerHist = GetDrawableHist(summerHistResultPtr);
fallHist->SetLineWidth(6);
winterHist->SetLineColor(
kBlue);
winterHist->SetLineWidth(6);
springHist->SetLineColor(
kGreen);
springHist->SetLineWidth(6);
summerHist->SetLineColor(
kRed);
summerHist->SetLineWidth(6);
auto hist_1993_to_2002 = GetDrawableHist(hist_1993_to_2002_ResultPtr);
auto hist_2003_to_2013 = GetDrawableHist(hist_2003_to_2013_ResultPtr);
hist_1993_to_2002->SetLineColor(
kViolet);
hist_1993_to_2002->SetLineWidth(6);
hist_2003_to_2013->SetLineColor(
kSpring);
hist_2003_to_2013->SetLineWidth(6);
auto canvas = RCanvas::Create("Average Temperature by Season");
auto legend = std::make_shared<TLegend>(0.15, 0.65, 0.53, 0.85);
legend->AddEntry(fallHist.get(), "fall", "l");
legend->AddEntry(winterHist.get(), "winter", "l");
legend->AddEntry(springHist.get(), "spring", "l");
legend->AddEntry(summerHist.get(), "summer", "l");
canvas->Show();
auto canvas2 = RCanvas::Create("Average Temperature: 1993_to_2002 vs. 2003_to_2013");
auto legend2 = std::make_shared<TLegend>(0.1, 0.7, 0.48, 0.9);
legend2->AddEntry(hist_1993_to_2002.get(), "1993_to_2002", "l");
legend2->AddEntry(hist_2003_to_2013.get(), "2003_to_2013", "l");
canvas2->Show();
}
void ntpl011_global_temperatures()
{
Ingest();
}
Analyze();
}
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
R__EXTERN TSystem * gSystem
Provides v7 drawing facilities for TObject types (TGraph, TH1, TH2, etc).
The RRawFile provides read-only access to local and remote files.
Smart pointer for the return type of actions.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
1-D histogram with a double per channel (see TH1 documentation)
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
On construction, an ROptions parameter can customize the RRawFile behavior.
size_t fBlockSize
Read at least fBlockSize bytes at a time. A value of zero turns off I/O buffering.