Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion src/spikeinterface/extractors/mclustextractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ def __init__(self, folder_path, sampling_frequency, sampling_frequency_raw=None)
ext = None

for e in ext_list:
files = Path(folder_path).glob(f"*.{e}")
# `glob` returns a lazy generator, which is always truthy; materialize it so the
# `if files` check actually reflects whether any file with this extension exists.
files = sorted(Path(folder_path).glob(f"*.{e}"))
if files:
ext = e
break
Expand All @@ -61,6 +63,13 @@ def __init__(self, folder_path, sampling_frequency, sampling_frequency_raw=None)
line = f.readline()
reading_header = not line.decode("utf-8").startswith(end_header_str)
times = np.fromfile(f, dtype=dataformat)
# MClust 3.x writes big-endian uint64 timestamps into the plain `.t` suffix, but
# `dataformat` is uint32 for any non-`64` extension. Reading 64-bit values as 32-bit
# produces interleaved zero/value word pairs (the high word is zero for timestamps that
# fit in 32 bits), so dropping zero words lets both 32- and 64-bit `.t` files load. This
# matches the community reference loader.
if ext.startswith("t") and dataformat == ">u4":
times = times[times > 0]
if ext.startswith("t"):
times = times / 10000
else:
Expand Down
29 changes: 29 additions & 0 deletions src/spikeinterface/extractors/tests/test_mclustextractors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import importlib.util
import shutil

import pytest

from spikeinterface.core import download_dataset
from spikeinterface.extractors.extractor_classes import read_mclust


@pytest.mark.skipif(
importlib.util.find_spec("pooch") is None or importlib.util.find_spec("datalad") is None,
reason="Either pooch or datalad is not installed",
)
def test_read_mclust(tmp_path):
# The mclust dataset on gin stores big-endian uint64 timestamps in the plain `.t` suffix
# (MClust 3.x). This previously made read_mclust return zero units: the extension was always
# detected as `t64` and the uint64 data was misread as uint32 (GH-4602).
folder = download_dataset(remote_path="mclust")

# The dataset has two `.t` files whose names both end in `_1`, so they parse to the same unit
# id. Isolate one of them (the smaller TT06 file) so read_mclust sees a single unit.
single_folder = tmp_path / "mclust_single"
single_folder.mkdir()
shutil.copy(folder / "M040-2020-04-28-TT06_1.t", single_folder / "M040-2020-04-28-TT06_1.t")

sorting = read_mclust(single_folder, sampling_frequency=32000.0)

assert sorting.get_num_units() == 1
assert len(sorting.get_unit_spike_train(unit_id=1)) == 5988
Loading