Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Enhancements
- Added a `HERD`-specific `__repr__` and `_repr_html_` that surface the references as a flattened table, so a `HERD` (especially one read back from a file) no longer appears empty in its default display. @rly [#1510](https://github.com/hdmf-dev/hdmf/pull/1510)
- `HERD.add_ref` now defaults `key` to the value of a scalar string `attribute` when `key` is not provided, removing the redundant argument in the common case. @rly [#1511](https://github.com/hdmf-dev/hdmf/pull/1511)
- `HERD.add_ref` no longer warns when an `entity_uri` is provided for an already-existing `entity_id` and the URI matches the stored one. The entity tables are normalized, so re-passing the same `entity_uri` (common when annotating many objects or files with the same entity) is harmless; a warning is now emitted only when a *different* `entity_uri` is provided, in which case the existing URI is kept. @bendichter [#1513](https://github.com/hdmf-dev/hdmf/pull/1513)

### Fixed
- Fixed `HERD.assert_external_resources_equal` not comparing the `entity_keys` table, so HERDs differing only in entity-key relationships compared as equal. @rly [#1500](https://github.com/hdmf-dev/hdmf/pull/1500)
Expand Down
9 changes: 7 additions & 2 deletions src/hdmf/common/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,8 +693,13 @@ def add_ref(self, **kwargs): # noqa: C901
# The entity exists and so we need to check if an entity_key exists
# for this entity and key combination.
check_entity_key = True
if entity_uri is not None:
msg = 'This entity already exists. Ignoring new entity uri'
# The existing entity_uri is always kept. Re-passing the same entity_uri is
# harmless and common when annotating many objects/files with the same entity,
# so only warn when a *different* entity_uri is provided.
if entity_uri is not None and entity_uri != entity.entity_uri:
msg = ("The provided entity_uri '%s' does not match the existing entity_uri '%s' "
"for entity_id '%s'. The existing entity_uri is kept."
% (entity_uri, entity.entity_uri, entity_id))
warn(msg, stacklevel=3)

object_field = self._validate_object(container, attribute, field, file)
Expand Down
42 changes: 38 additions & 4 deletions tests/unit/common/test_resources.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd
import unittest
import warnings
from hdmf.common import DynamicTable, VectorData, get_type_map, get_manager
from hdmf.common import CORE_NAMESPACE as HDMF_COMMON_NAMESPACE
from hdmf import TermSet, TermSetWrapper
Expand Down Expand Up @@ -1274,26 +1275,59 @@ def test_entity_uri_error(self):
key='Mus musculus',
entity_id='NCBI:txid10090')

def test_entity_uri_warning(self):
def test_entity_uri_warning_on_mismatch(self):
# providing a *different* entity_uri for an existing entity_id warns and keeps the existing uri
er = HERD()
data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)],
dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')]))

data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)],
dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')]))

existing_uri = 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090'
er.add_ref(file=HERDManagerContainer(name='file'),
container=data_1,
key='Mus musculus',
entity_id='NCBI:txid10090',
entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090')
entity_uri=existing_uri)
existing_key = er.get_key('Mus musculus')
msg = ("The provided entity_uri 'https://example.com/different' does not match the existing "
"entity_uri '%s' for entity_id 'NCBI:txid10090'. The existing entity_uri is kept."
% existing_uri)
with self.assertWarnsWith(UserWarning, msg):
er.add_ref(file=HERDManagerContainer(name='file'),
container=data_2,
key=existing_key,
entity_id='NCBI:txid10090',
entity_uri='https://example.com/different')
# the existing entity is reused (not duplicated) and its uri is unchanged
self.assertEqual(er.entities.data, [('NCBI:txid10090', existing_uri)])

def test_entity_uri_no_warning_when_same(self):
# re-passing the *same* entity_uri for an existing entity_id does not warn and does not duplicate
er = HERD()
data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)],
dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')]))

data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)],
dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')]))

uri = 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090'
er.add_ref(file=HERDManagerContainer(name='file'),
container=data_1,
key='Mus musculus',
entity_id='NCBI:txid10090',
entity_uri=uri)
existing_key = er.get_key('Mus musculus')
with self.assertWarns(Warning):
with warnings.catch_warnings():
warnings.simplefilter("error") # any warning becomes an error
er.add_ref(file=HERDManagerContainer(name='file'),
container=data_2,
key=existing_key,
entity_id='NCBI:txid10090',
entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090')
entity_uri=uri)
# the entity table is still normalized (a single row), no duplicate added
self.assertEqual(er.entities.data, [('NCBI:txid10090', uri)])

def test_key_without_entity_error(self):
er = HERD()
Expand Down
Loading