forked from Vindaar/TimepixAnalysis
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprintXyDataset.nim
More file actions
135 lines (125 loc) · 4.88 KB
/
printXyDataset.nim
File metadata and controls
135 lines (125 loc) · 4.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import nimhdf5, cligen, seqmath
import std / [strformat, os, stats, sequtils, sets, algorithm, strutils]
import ingrid / tos_helpers
type
## A generic cut on input data using dset & low / high values
GenericCut* = tuple[dset: string, lower, upper: float]
Stats = object
min: float
max: float
mean: float
sum: float
proc readIndices(h5f: H5File, path: string, cuts: seq[GenericCut]): seq[int] =
## get the correct indices we need to read for the data `dset` such that it matches
## the given cuts. Cuts are only allowed from datasets at the same level as `path`
## and must be readable as `float`.
# 1. get `dset`
let dset = h5f[path.dset_str]
# 2. get number of elements from shape of dset
let num = dset.shape[0]
# 3. create full set of indices from shape (i.e. all data)
var idxs = toSeq(0 ..< num).toSet
# 4. apply filters for each cut
for cut in cuts:
# 4.1. read data for this cut
let data = h5f[(path / cut.dset), float] # read as float
for i in toSeq(idxs):
let x = data[i]
if x < cut.lower or x > cut.upper:
idxs.excl i
result = toSeq(idxs).sorted
proc singleRun(h5f: H5File, path: string,
head: int, verbose: bool, cuts: seq[GenericCut] = @[]): Stats =
let h5dset = h5f[path.dset_str]
let idxs = readIndices(h5f, path, cuts)
withDset(h5dset):
var dsetL = idxs.mapIt(dset[it])
if head > 0:
dsetL = dsetL[0 ..< head]
if verbose:
echo dsetL
when typeof(dsetL[0]) is SomeNumber:
let dsetF = dsetL.mapIt(it.float)
result = Stats(min: dsetF.min, max: dsetF.max,
mean: dsetF.mean, sum: dsetF.sum)
echo "Min: ", result.min
echo "Max: ", result.max
echo "Mean: ", result.mean
echo "Sum: ", result.sum
elif typeof(dsetL[0]) is seq:
when typeof(dsetL[0][0]) is SomeNumber:
let dsetF = dsetL.flatten.mapIt(it.float)
result = Stats(min: dsetF.min, max: dsetF.max,
mean: dsetF.mean, sum: dsetF.sum)
echo "Min: ", result.min
echo "Max: ", result.max
echo "Mean: ", result.mean
echo "Sum: ", result.sum
else:
echo "No aggregates for type: ", typeof(dset)
proc getPath(base: string, run: int, chip: int, dset: string, global: bool): string =
result = ""
if not global:
if chip < 0:
quit("For chip related datasets, must hand a chip number!")
result = &"{base}{run}/chip_{chip}/{dset}"
else:
result = &"{base}{run}/{dset}"
proc main(fname: string, dset:string, global = false,
run = -1, chip = -1,
reco = false, likelihood = false, head = 0, verbose = false,
cuts: seq[GenericCut] = @[]) =
let h5f = H5open(fname, "r")
var path = rawDataBase()
if reco:
path = recoBase()
if likelihood:
path = likelihoodBase()
echo "Dataset: ", dset
if run >= 0:
let dsetPath = getPath(path, run, chip, dset, global)
discard singleRun(h5f, dsetPath, head, verbose, cuts)
else:
var stats = newSeq[Stats]()
for (num, grp) in runs(h5f, path):
echo "Run: ", num
echo "--------------------------------------------------"
let dsetPath = getPath(path, num, chip, dset, global)
stats.add singleRun(h5f, dsetPath, head, verbose, cuts)
echo "=================================================="
echo "Combined stats:"
echo "=================================================="
echo "Min: ", stats.mapIt(it.min).min
echo "Max: ", stats.mapIt(it.max).max
echo "Mean: ", stats.mapIt(it.mean).mean
echo "Sum: ", stats.mapIt(it.sum).sum
when isMainModule:
import cligen/argcvt
proc argParse(dst: var GenericCut, dfl: GenericCut,
a: var ArgcvtParams): bool =
echo "Parsing ", a.val
var vals = a.val.strip(chars = {'(', ')'}).split(',')
if vals.len != 3: return false
try:
echo "vals: ", vals
dst = (dset: vals[0].strip(chars = {'"'}),
lower: parseFloat(vals[1].strip),
upper: parseFloat(vals[2].strip))
echo "Parsed it to ", dst
result = true
except:
result = false
proc argHelp*(dfl: GenericCut; a: var ArgcvtParams): seq[string] =
result = @[ a.argKeys, "(dset: string, lower, upper: float)", $dfl ]
dispatch(main, help = {
"fname" : "The input file name to print data from",
"chip" : "Chip to read data from in the file",
"run" : "Run to read data from in the file. If not given all runs (and summary) will be printed.",
"dset" : "The dataset to read",
"global" : "If set will read a 'global' (i.e. in root of run group) dataset.",
"reco" : "If true will read from `/reconstruction/` path",
"likelihood" : "If true will read from `/likelihood/` path",
"head" : "Only print `head` first elements",
"cuts" : "Allows to cut data used for event display. Only shows events of data passing these cuts.",
"verbose" : "Print the whole dataset (not just summaries)"
})