Skip to content

Commit 9587e93

Browse files
committed
updates
1 parent a13eb46 commit 9587e93

File tree

1 file changed

+7
-16
lines changed

1 file changed

+7
-16
lines changed

src/hangar/dataloaders/grouper.py

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import numpy as np
22

33
from ..arrayset import ArraysetDataReader
4+
from ..records.hashmachine import array_hash_digest
45

56
from collections import defaultdict
6-
import hashlib
7-
from typing import Sequence, Union, Iterable, NamedTuple
8-
import struct
7+
from typing import Sequence, Union, Iterable, NamedTuple, Tuple
98

109

1110
# -------------------------- typehints ---------------------------------------
@@ -21,21 +20,14 @@
2120
# ------------------------------------------------------------------------------
2221

2322

24-
def _calculate_hash_digest(data: np.ndarray) -> str:
25-
hasher = hashlib.blake2b(data, digest_size=20)
26-
hasher.update(struct.pack(f'<{len(data.shape)}QB', *data.shape, data.dtype.num))
27-
digest = hasher.hexdigest()
28-
return digest
29-
30-
3123
class FakeNumpyKeyDict(object):
3224
def __init__(self, group_spec_samples, group_spec_value, group_digest_spec):
3325
self._group_spec_samples = group_spec_samples
3426
self._group_spec_value = group_spec_value
3527
self._group_digest_spec = group_digest_spec
3628

3729
def __getitem__(self, key: np.ndarray) -> ArraysetSampleNames:
38-
digest = _calculate_hash_digest(key)
30+
digest = array_hash_digest(key)
3931
spec = self._group_digest_spec[digest]
4032
samples = self._group_spec_samples[spec]
4133
return samples
@@ -53,7 +45,7 @@ def __len__(self) -> int:
5345
return len(self._group_digest_spec)
5446

5547
def __contains__(self, key: np.ndarray) -> bool:
56-
digest = _calculate_hash_digest(key)
48+
digest = array_hash_digest(key)
5749
res = True if digest in self._group_digest_spec else False
5850
return res
5951

@@ -69,7 +61,7 @@ def values(self) -> Iterable[ArraysetSampleNames]:
6961
for spec in self._group_digest_spec.values():
7062
yield self._group_spec_samples[spec]
7163

72-
def items(self) -> Iterable[ArraysetSampleNames]:
64+
def items(self) -> Iterable[Tuple[np.ndarray, ArraysetSampleNames]]:
7365
for spec in self._group_digest_spec.values():
7466
yield (self._group_spec_value[spec], self._group_spec_samples[spec])
7567

@@ -81,11 +73,10 @@ def __repr__(self):
8173
def _repr_pretty_(self, p, cycle):
8274
res = f'Mapping: Group Data Value -> Sample Name \n'
8375
for k, v in self.items():
84-
res += f'\n {k} :: {v}'
76+
res += f'\n {k} :: {v} \n'
8577
p.text(res)
8678

8779

88-
8980
# ---------------------------- MAIN METHOD ------------------------------------
9081

9182

@@ -112,7 +103,7 @@ def _setup(self):
112103
for spec, names in self._group_spec_samples.items():
113104
data = self.__arrayset._fs[spec.backend].read_data(spec)
114105
self._group_spec_value[spec] = data
115-
digest = _calculate_hash_digest(data)
106+
digest = array_hash_digest(data)
116107
self._group_digest_spec[digest] = spec
117108

118109
@property

0 commit comments

Comments
 (0)