Skip to content

Commit 844e080

Browse files
committed
fix: deduplicate time values in add_location instead of raising ValueError
Fixes #1478. EK80 data commonly has duplicate timestamps in Platform time dimensions (e.g., from multiple NMEA sentences — GGA, GLL, RMC — at the same timestamp). Previously add_location() raised a ValueError, requiring manual deduplication before use. Now duplicate time values are automatically removed (keeping the first occurrence) with a UserWarning, and interpolation proceeds normally. - loc_utils.py: check_loc_time_dim_duplicates returns deduplicated array - api.py: use returned array for downstream interpolation - test: updated to verify warning + successful location addition
1 parent 97a8769 commit 844e080

3 files changed

Lines changed: 31 additions & 19 deletions

File tree

echopype/consolidate/api.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,8 +316,9 @@ def add_location(
316316
datagram_type=datagram_type,
317317
)
318318

319-
# Check if there are duplicates in time_dim_name for this NMEA subset
320-
check_loc_time_dim_duplicates(loc_var, time_dim_name)
319+
# Deduplicate time dimension if needed (e.g. multiple NMEA sentences
320+
# at the same timestamp); required for downstream interpolation.
321+
loc_var = check_loc_time_dim_duplicates(loc_var, time_dim_name)
321322

322323
interp_ds[interp_loc_name] = align_to_ping_time(
323324
loc_var, time_dim_name, ds["ping_time"], "linear"

echopype/consolidate/loc_utils.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import warnings
12
from typing import Union
23

34
import numpy as np
@@ -107,13 +108,24 @@ def check_loc_vars_validity(
107108
logger.warning(output_message)
108109

109110

110-
def check_loc_time_dim_duplicates(da: xr.DataArray, time_dim_name: str) -> None:
111-
"""Check if there are duplicates in time_dim_name"""
112-
if len(np.unique(da[time_dim_name].data)) != len(da[time_dim_name].data):
113-
raise ValueError(
114-
f'Data contains duplicate time values in time_dim_name "{time_dim_name}". '
115-
"Downstream interpolation on the position variables requires unique time values."
111+
def check_loc_time_dim_duplicates(da: xr.DataArray, time_dim_name: str) -> xr.DataArray:
112+
"""Check for and remove duplicates in time_dim_name.
113+
114+
If duplicate time values are found, they are removed (keeping the first
115+
occurrence) and a warning is logged. The deduplicated DataArray is returned.
116+
"""
117+
time_vals = da[time_dim_name].data
118+
if len(np.unique(time_vals)) != len(time_vals):
119+
n_total = len(time_vals)
120+
n_unique = len(np.unique(time_vals))
121+
warnings.warn(
122+
f'Dropped {n_total - n_unique} duplicate value(s) in "{time_dim_name}".',
123+
UserWarning,
124+
stacklevel=2,
116125
)
126+
_, unique_idx = np.unique(time_vals, return_index=True)
127+
da = da.isel({time_dim_name: np.sort(unique_idx)})
128+
return da
117129

118130

119131
def sel_nmea(

echopype/tests/consolidate/test_add_location.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -234,10 +234,10 @@ def _tests(ds_test, location_type, nmea_sentence=None):
234234
),
235235
],
236236
)
237-
def test_add_location_time_duplicates_value_error(
237+
def test_add_location_time_duplicates_warning(
238238
ek80_path, raw_path, sonar_model, datagram_type, parse_idx, time_dim_name, compute_Sv_kwargs,
239239
):
240-
"""Tests for duplicate time value error in ``add_location``."""
240+
"""Tests that duplicate time values are handled with a warning, not an error."""
241241
# Open raw and compute the Sv dataset
242242
if parse_idx:
243243
ed = ep.open_raw(ek80_path / raw_path, include_idx=True, sonar_model=sonar_model)
@@ -255,16 +255,15 @@ def test_add_location_time_duplicates_value_error(
255255
vals[0] = vals[1]
256256
ed["Platform"] = ed["Platform"].assign_coords({time_dim_name: (da.dims, vals)})
257257

258-
# Check if the expected error is logged
259-
with pytest.raises(ValueError) as exc_info:
260-
# Run add location with duplicated time
261-
ep.consolidate.add_location(ds=ds, echodata=ed, datagram_type=datagram_type)
258+
# Should succeed with a warning instead of raising ValueError
259+
with pytest.warns(UserWarning, match="Dropped 1 duplicate value"):
260+
ds_loc = ep.consolidate.add_location(
261+
ds=ds, echodata=ed, datagram_type=datagram_type
262+
)
262263

263-
# Check if the specific error message is in the logs
264-
assert (
265-
f'Data contains duplicate time values in time_dim_name "{time_dim_name}". '
266-
"Downstream interpolation on the position variables requires unique time values."
267-
) == str(exc_info.value)
264+
# Verify location was successfully added
265+
assert "latitude" in ds_loc
266+
assert "longitude" in ds_loc
268267

269268

270269
@pytest.mark.integration

0 commit comments

Comments
 (0)