#include <algorithm>
#include <cassert>
#include <cstdio>
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include <sstream>
#include <stdexcept>
#include <utility>
#include <chrono>
using Clock = std::chrono::high_resolution_clock;
{
auto result = std::shared_ptr<TH1D>(
static_cast<TH1D *
>(
h.GetPtr()->Clone()));
result->SetDirectory(
nullptr);
}
constexpr const char *kRawDataUrl = "http://root.cern./files/tutorials/GlobalLandTemperaturesByCity.csv";
constexpr const char *kNTupleFileName = "GlobalLandTemperaturesByCity.root";
void Ingest()
{
int nRecords = 0;
int nSkipped = 0;
std::cout << "Converting " << kRawDataUrl << " to " << kNTupleFileName << std::endl;
auto model = RNTupleModel::Create();
auto fieldYear = model->MakeField<std::uint32_t>("Year");
auto fieldMonth = model->MakeField<std::uint32_t>("Month");
auto fieldDay = model->MakeField<std::uint32_t>("Day");
auto fieldAvgTemp = model->MakeField<float>("AverageTemperature");
auto fieldTempUncrty = model->MakeField<float>("AverageTemperatureUncertainty");
auto fieldCity = model->MakeField<std::string>("City");
auto fieldCountry = model->MakeField<std::string>("Country");
auto fieldLat = model->MakeField<float>("Latitude");
auto fieldLong = model->MakeField<float>("Longitude");
auto ntuple = RNTupleWriter::Recreate(std::move(model), "GlobalTempData", kNTupleFileName);
auto file = RRawFile::Create(kRawDataUrl, options);
std::string record;
constexpr int kMaxCharsPerLine = 128;
while (file->Readln(record)) {
if (record.length() >= kMaxCharsPerLine)
throw std::runtime_error("record too long: " + record);
std::replace(record.begin(), record.end(), ',', ' ');
char country[kMaxCharsPerLine];
char city[kMaxCharsPerLine];
int nFields =
sscanf(record.c_str(), "%u-%u-%u %f %f %s %s %fN %fE", fieldYear.get(), fieldMonth.get(), fieldDay.get(),
fieldAvgTemp.get(), fieldTempUncrty.get(), country, city, fieldLat.get(), fieldLong.get());
if (nFields != 9) {
nSkipped++;
continue;
}
*fieldCountry = country;
*fieldCity = city;
ntuple->Fill();
if (++nRecords % 1000000 == 0)
std::cout << " ... converted " << nRecords << " records" << std::endl;
}
std::cout << nSkipped << " records skipped" << std::endl;
std::cout << nRecords << " records processed" << std::endl;
auto t2 = Clock::now();
std::cout << std::endl
<<
"Processing Time: " << std::chrono::duration_cast<std::chrono::seconds>(t2 -
t1).count() <<
" seconds\n"
<< std::endl;
}
void Analyze()
{
df.Display()->Print();
auto min_value = df.Min("AverageTemperature");
auto max_value = df.Max("AverageTemperature");
auto fnWinter = [](int month) { return month == 12 || month == 1 || month == 2; };
auto fnSpring = [](int month) { return month == 3 || month == 4 || month == 5; };
auto fnSummer = [](int month) { return month == 6 || month == 7 || month == 8; };
auto fnFall = [](int month) { return month == 9 || month == 10 || month == 11; };
auto dfWinter = df.Filter(fnWinter, {"Month"});
auto dfSpring = df.Filter(fnSpring, {"Month"});
auto dfSummer = df.Filter(fnSummer, {"Month"});
auto dfFall = df.Filter(fnFall, {"Month"});
auto winterCount = dfWinter.Count();
auto springCount = dfSpring.Count();
auto summerCount = dfSummer.Count();
auto fallCount = dfFall.Count();
auto fn1993_to_2002 = [](int year) { return year >= 1993 && year <= 2002; };
auto fn2003_to_2013 = [](int year) { return year >= 2003 && year <= 2013; };
auto df1993_to_2002 = df.Filter(fn1993_to_2002, {"Year"});
auto df2003_to_2013 = df.Filter(fn2003_to_2013, {"Year"});
auto decade_1993_to_2002_Count = *df1993_to_2002.Count();
auto decade_2003_to_2013_Count = *df2003_to_2013.Count();
auto fallHistResultPtr =
dfFall.Histo1D({"Fall Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
auto winterHistResultPtr =
dfWinter.Histo1D({"Winter Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
auto springHistResultPtr =
dfSpring.Histo1D({"Spring Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
auto summerHistResultPtr =
dfSummer.Histo1D({"Summer Average Temp", "Average Temperature by Season", 100, -40, 40}, "AverageTemperature");
auto hist_1993_to_2002_ResultPtr = df1993_to_2002.Histo1D(
{"1993_to_2002 Average Temp", "Average Temperature: 1993_to_2002 vs. 2003_to_2013", 100, -40, 40},
"AverageTemperature");
auto hist_2003_to_2013_ResultPtr = df2003_to_2013.Histo1D(
{"2003_to_2013 Average Temp", "Average Temperature: 1993_to_2002 vs. 2003_to_2013", 100, -40, 40},
"AverageTemperature");
std::cout << std::endl << "The Minimum temperature is: " << *min_value << std::endl;
std::cout << "The Maximum temperature is: " << *max_value << std::endl;
std::cout << std::endl << "The count for Winter: " << *winterCount << std::endl;
std::cout << "The count for Spring: " << *springCount << std::endl;
std::cout << "The count for Summer: " << *summerCount << std::endl;
std::cout << "The count for Fall: " << *fallCount << std::endl;
std::cout << std::endl << "The count for 1993_to_2002: " << decade_1993_to_2002_Count << std::endl;
std::cout << "The count for 2003_to_2013: " << decade_2003_to_2013_Count << std::endl;
auto fallHist = GetDrawableHist(fallHistResultPtr);
auto winterHist = GetDrawableHist(winterHistResultPtr);
auto springHist = GetDrawableHist(springHistResultPtr);
auto summerHist = GetDrawableHist(summerHistResultPtr);
fallHist->SetLineWidth(6);
winterHist->SetLineColor(
kBlue);
winterHist->SetLineWidth(6);
springHist->SetLineColor(
kGreen);
springHist->SetLineWidth(6);
summerHist->SetLineColor(
kRed);
summerHist->SetLineWidth(6);
auto hist_1993_to_2002 = GetDrawableHist(hist_1993_to_2002_ResultPtr);
auto hist_2003_to_2013 = GetDrawableHist(hist_2003_to_2013_ResultPtr);
hist_1993_to_2002->SetLineColor(
kViolet);
hist_1993_to_2002->SetLineWidth(6);
hist_2003_to_2013->SetLineColor(
kSpring);
hist_2003_to_2013->SetLineWidth(6);
auto canvas = RCanvas::Create("Average Temperature by Season");
auto legend = std::make_shared<TLegend>(0.15, 0.65, 0.53, 0.85);
legend->AddEntry(fallHist.get(), "fall", "l");
legend->AddEntry(winterHist.get(), "winter", "l");
legend->AddEntry(springHist.get(), "spring", "l");
legend->AddEntry(summerHist.get(), "summer", "l");
canvas->Show();
auto canvas2 = RCanvas::Create("Average Temperature: 1993_to_2002 vs. 2003_to_2013");
auto legend2 = std::make_shared<TLegend>(0.1, 0.7, 0.48, 0.9);
legend2->AddEntry(hist_1993_to_2002.get(), "1993_to_2002", "l");
legend2->AddEntry(hist_2003_to_2013.get(), "2003_to_2013", "l");
canvas2->Show();
}
void ntpl011_global_temperatures()
{
Ingest();
}
Analyze();
}
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
R__EXTERN TSystem * gSystem
Provides v7 drawing facilities for TObject types (TGraph, TH1, TH2, etc).
The RRawFile provides read-only access to local and remote files.
Smart pointer for the return type of actions.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
1-D histogram with a double per channel (see TH1 documentation)
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
On construction, an ROptions parameter can customize the RRawFile behavior.
size_t fBlockSize
Read at least fBlockSize bytes at a time. A value of zero turns off I/O buffering.