-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathhdf5db.cpp
264 lines (225 loc) · 7.14 KB
/
hdf5db.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
/*
* Copyright 2021-2023 Lawrence Livermore National Security, LLC and other
* AMSLib Project Developers
*
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#include "wf/basedb.hpp"
using namespace ams::db;
hid_t hdf5DB::getDataSet(hid_t group,
std::string dName,
hid_t dataType,
const size_t Chunk)
{
// Our datasets a.t.m are 1-D vectors
const int nDims = 1;
// We always start from 0
hsize_t dims = 0;
hid_t dset = -1;
int exists = H5Lexists(group, dName.c_str(), H5P_DEFAULT);
if (exists > 0) {
dset = H5Dopen(group, dName.c_str(), H5P_DEFAULT);
HDF5_ERROR(dset);
// We are assuming symmetrical data sets a.t.m
if (totalElements == 0) {
hid_t dspace = H5Dget_space(dset);
const int ndims = H5Sget_simple_extent_ndims(dspace);
hsize_t dims[ndims];
H5Sget_simple_extent_dims(dspace, dims, NULL);
totalElements = dims[0];
}
return dset;
} else {
// We will extend the data-set size, so we use unlimited option
hsize_t maxDims = H5S_UNLIMITED;
hid_t fileSpace = H5Screate_simple(nDims, &dims, &maxDims);
HDF5_ERROR(fileSpace);
hid_t pList = H5Pcreate(H5P_DATASET_CREATE);
HDF5_ERROR(pList);
herr_t ec = H5Pset_layout(pList, H5D_CHUNKED);
HDF5_ERROR(ec);
// cDims impacts performance considerably.
// TODO: Align this with the caching mechanism for this option to work
// out.
hsize_t cDims = Chunk;
H5Pset_chunk(pList, nDims, &cDims);
dset = H5Dcreate(group,
dName.c_str(),
dataType,
fileSpace,
H5P_DEFAULT,
pList,
H5P_DEFAULT);
HDF5_ERROR(dset);
H5Sclose(fileSpace);
H5Pclose(pList);
}
return dset;
}
void hdf5DB::createDataSets(size_t numElements,
const size_t numIn,
const size_t numOut)
{
for (int i = 0; i < numIn; i++) {
hid_t dSet =
getDataSet(HFile, std::string("input_") + std::to_string(i), HDType);
HDIsets.push_back(dSet);
}
for (int i = 0; i < numOut; i++) {
hid_t dSet =
getDataSet(HFile, std::string("output_") + std::to_string(i), HDType);
HDOsets.push_back(dSet);
}
if (storePredicate()) {
pSet = getDataSet(HFile, "predicate", H5T_NATIVE_HBOOL);
}
}
template <typename TypeValue>
void hdf5DB::writeDataToDataset(std::vector<hid_t>& dsets,
std::vector<TypeValue*>& data,
size_t numElements)
{
int index = 0;
for (auto* I : data) {
writeVecToDataset(dsets[index++],
static_cast<void*>(I),
numElements,
HDType);
}
}
void hdf5DB::writeVecToDataset(hid_t dSet,
void* data,
size_t elements,
hid_t DType)
{
const int nDims = 1;
hsize_t dims = elements;
hsize_t start;
hsize_t count;
hid_t memSpace = H5Screate_simple(nDims, &dims, NULL);
HDF5_ERROR(memSpace);
dims = totalElements + elements;
H5Dset_extent(dSet, &dims);
hid_t fileSpace = H5Dget_space(dSet);
HDF5_ERROR(fileSpace);
// Data set starts at offset totalElements
start = totalElements;
// And we append additional elements
count = elements;
// Select hyperslab
herr_t err = H5Sselect_hyperslab(
fileSpace, H5S_SELECT_SET, &start, NULL, &count, NULL);
HDF5_ERROR(err);
H5Dwrite(dSet, DType, memSpace, fileSpace, H5P_DEFAULT, data);
H5Sclose(fileSpace);
}
template <typename TypeValue>
void hdf5DB::_store(size_t num_elements,
std::vector<TypeValue*>& inputs,
std::vector<TypeValue*>& outputs,
bool* predicate)
{
CALIPER(CALI_MARK_BEGIN("STORE_HDF5");)
if (isDouble<TypeValue>::default_value())
HDType = H5T_NATIVE_DOUBLE;
else
HDType = H5T_NATIVE_FLOAT;
CFATAL(HDF5DB,
storePredicate() && predicate == nullptr,
"DB Configured to store predicates, predicate is not provided")
DBG(DB,
"DB of type %s stores %ld elements of input/output dimensions (%lu, "
"%lu)",
type().c_str(),
num_elements,
inputs.size(),
outputs.size())
const size_t num_in = inputs.size();
const size_t num_out = outputs.size();
if (HDIsets.empty()) {
createDataSets(num_elements, num_in, num_out);
}
CFATAL(HDF5DB,
(HDIsets.size() != num_in || HDOsets.size() != num_out),
"The data dimensionality is different than the one in the "
"DB")
writeDataToDataset(HDIsets, inputs, num_elements);
writeDataToDataset(HDOsets, outputs, num_elements);
if (storePredicate() && predicate != nullptr) {
writeVecToDataset(pSet,
static_cast<void*>(predicate),
num_elements,
H5T_NATIVE_HBOOL);
}
totalElements += num_elements;
CALIPER(CALI_MARK_END("STORE_HDF5");)
}
hdf5DB::hdf5DB(std::string path,
std::string domain_name,
std::string fn,
uint64_t rId,
bool predicate)
: FileDB(path, fn, predicate ? ".debug.h5" : ".h5", rId),
predicateStore(predicate)
{
std::error_code ec;
bool exists = fs::exists(this->fn);
this->checkError(ec);
if (exists)
HFile = H5Fopen(this->fn.c_str(), H5F_ACC_RDWR, H5P_DEFAULT);
else {
HFile = H5Fcreate(this->fn.c_str(), H5F_ACC_EXCL, H5P_DEFAULT, H5P_DEFAULT);
hsize_t dims[1] = {domain_name.size()};
hid_t dataspace_id = H5Screate_simple(1, dims, NULL);
hid_t dataset_id = H5Dcreate(HFile,
"domain_name",
H5T_NATIVE_CHAR,
dataspace_id,
H5P_DEFAULT,
H5P_DEFAULT,
H5P_DEFAULT);
H5Dwrite(dataset_id,
H5T_NATIVE_CHAR,
H5S_ALL,
H5S_ALL,
H5P_DEFAULT,
domain_name.c_str());
H5Dclose(dataset_id);
H5Sclose(dataspace_id);
}
HDF5_ERROR(HFile);
totalElements = 0;
HDType = -1;
}
hdf5DB::~hdf5DB()
{
DBG(DB, "Closing File: %s %s", type().c_str(), this->fn.c_str())
// HDF5 Automatically closes all opened fds at exit of application.
// herr_t err = H5Fclose(HFile);
// HDF5_ERROR(err);
}
void hdf5DB::store(size_t num_elements,
std::vector<float*>& inputs,
std::vector<float*>& outputs,
bool* predicate)
{
if (HDType == -1) {
HDType = H5T_NATIVE_FLOAT;
}
CFATAL(HDF5DB,
HDType != H5T_NATIVE_FLOAT,
"Database %s initialized to work on 'float' received different "
"datatypes",
fn.c_str());
_store(num_elements, inputs, outputs, predicate);
}
void hdf5DB::store(size_t num_elements,
std::vector<double*>& inputs,
std::vector<double*>& outputs,
bool* predicate)
{
if (HDType == -1) {
HDType = H5T_NATIVE_DOUBLE;
}
_store(num_elements, inputs, outputs, predicate);
}