Logo ROOT  
Reference Guide
parse_CSV_file_with_TTree_ReadStream.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_pyroot
3## This function provides an example of how one might
4## massage a csv data file to read into a ROOT TTree
5## via TTree::ReadStream. This could be useful if the
6## data read out from some DAQ program doesn't 'quite'
7## match the formatting expected by ROOT (e.g. comma-
8## separated, tab-separated with white-space strings,
9## headers not matching the expected format, etc.)
10##
11## This example is shipped with a data
12## file that looks like:
13##
14## ~~~{.cpp}
15## Date/Time Synchro Capacity Temp.Cold Head Temp. Electrode HV Supply Voltage Electrode 1 Electrode 2 Electrode 3 Electrode 4
16## # Example data to read out. Some data have oddities that might need to
17## # dealt with, including the 'NaN' in Electrode 4 and the empty string in Date/Time (last row)
18## 08112010.160622 7 5.719000E-10 8.790500 24.237700 -0.008332 0 0 0 0
19## 8112010.160626 7 5.710000E-10 8.828400 24.237500 -0.008818 0 0 0 0
20## 08112010.160626 7 5.719000E-10 8.828400 24.237500 -0.008818 0 0 0 0
21## 08112010.160627 7 5.719000E-10 9.014300 24.237400 -0.028564 0 0 0 NaN
22## 08112010.160627 7 5.711000E-10 8.786000 24.237400 -0.008818 0 0 0 0
23## 08112010.160628 7 5.702000E-10 8.786000 24.237400 -0.009141 0 0 0 0
24## 08112010.160633 7 5.710000E-10 9.016200 24.237200 -0.008818 0 0 0 0
25## 7 5.710000E-10 8.903400 24.237200 -0.008818 0 0 0 0
26## ~~~
27##
28## These data require some massaging, including:
29##
30## - Date/Time has a blank ('') entry that must be handled
31## - The headers are not in the correct format
32## - Tab-separated entries with additional white space
33## - NaN entries
34##
35## \macro_code
36##
37## \author Michael Marino
38from __future__ import print_function
39
40import ROOT
41import sys
42import os
43
44def parse_CSV_file_with_TTree_ReadStream(tree_name, afile):
45
46
47 ROOT.gROOT.SetBatch()
48 # The mapping dictionary defines the proper branch names and types given a header name.
49 header_mapping_dictionary = {
50 'Date/Time' : ('Datetime' , str) ,
51 'Synchro' : ('Synchro' , int) ,
52 'Capacity' : ('Capacitance' , float) ,
53 'Temp.Cold Head' : ('TempColdHead' , float) ,
54 'Temp. Electrode' : ('TempElectrode' , float) ,
55 'HV Supply Voltage' : ('HVSupplyVoltage', float) ,
56 'Electrode 1' : ('Electrode1' , int) ,
57 'Electrode 2' : ('Electrode2' , int) ,
58 'Electrode 3' : ('Electrode3' , int) ,
59 'Electrode 4' : ('Electrode4' , int) ,
60 }
61
62 type_mapping_dictionary = {
63 str : 'C',
64 int : 'I',
65 float : 'F'
66 }
67
68
69
70 # Grab the header row of the file. In this particular example,
71 # the data are separated using tabs, but some of the header names
72 # include spaces and are not generally in the ROOT expected format, e.g.
73 #
74 # FloatData/F:StringData/C:IntData/I
75 #
76 # etc. Therefore, we grab the header_row of the file, and use
77 # a python dictionary to set up the appropriate branch descriptor
78 # line.
79
80 # Open a file, grab the first line, strip the new lines
81 # and split it into a list along 'tab' boundaries
82 header_row = open(afile).readline().strip().split('\t')
83 # Create the branch descriptor
84 branch_descriptor = ':'.join([header_mapping_dictionary[row][0]+'/'+
85 type_mapping_dictionary[header_mapping_dictionary[row][1]]
86 for row in header_row])
87 #print(branch_descriptor)
88
89 # Handling the input and output names. Using the same
90 # base name for the ROOT output file.
91 output_ROOT_file_name = os.path.splitext(afile)[0] + '.root'
92 output_file = ROOT.TFile(output_ROOT_file_name, 'recreate')
93 print("Outputting %s -> %s" % (afile, output_ROOT_file_name))
94
95 output_tree = ROOT.TTree(tree_name, tree_name)
96 file_lines = open(afile).readlines()
97
98 # Clean the data entries: remove the first (header) row.
99 # Ensure empty strings are tagged as such since
100 # ROOT doesn't differentiate between different types
101 # of white space. Therefore, we change all of these
102 # entries to 'empty'. Also, avoiding any lines that begin
103 # with '#'
104 file_lines = ['\t'.join([val if (val.find(' ') == -1 and val != '')
105 else 'empty' for val in line.split('\t')])
106 for line in file_lines[1:] if line[0] != '#' ]
107
108 # Removing NaN, setting these entries to 0.0.
109 # Also joining the list of strings into one large string.
110 file_as_string = ('\n'.join(file_lines)).replace('NaN', str(0.0))
111 #print(file_as_string)
112
113 # creating an istringstream to pass into ReadStream
114 istring = ROOT.istringstream(file_as_string)
115
116 # Now read the stream
117 output_tree.ReadStream(istring, branch_descriptor)
118
119 output_file.cd()
120 output_tree.Write()
121
122
123if __name__ == '__main__':
124 if len(sys.argv) < 2:
125 print("Usage: %s file_to_parse.dat" % sys.argv[0])
126 sys.exit(1)
127 parse_CSV_file_with_TTree_ReadStream("example_tree", sys.argv[1])
128