Skip to content

Commit 96b3ab9

Browse files
authored
Updated using importlib_resource instead of pkg_resources and prepare for later versions of pandas (#492)
Fixes #491 - [x] Updated using `importlib_resource` instead of `pkg_resources` - Reason: `pkg_resources` is going to be deprecated. - [x] Refactor `pandas` related code to smoothly transition to future versions and handle deprecation warnings. - [Pandas PR](https://github.com/pandas-dev/pandas/pull/54710/files#diff-55001624a0932c1b6cee2e6ddb65dea85c1faf0dee84812c0ca0c32916a71438): ``` "Downcasting behavior in `replace` is deprecated and " "will be removed in a future version. To retain the old " "behavior, explicitly call " "`result.infer_objects(copy=False)`. " "To opt-in to the future " "behavior, set " "`pd.set_option('future.no_silent_downcasting', True)`", ``` - `A value is trying to be set on a copy of a slice from a DataFrame` - `.apply(max)` => `.apply(np.maximum.reduce)` - `UserWarning: Boolean Series key will be reindexed to match DataFrame index.`
1 parent d44267b commit 96b3ab9

File tree

5 files changed

+63
-12
lines changed

5 files changed

+63
-12
lines changed

poetry.lock

Lines changed: 24 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ deprecation = "^2.1.0"
2525
pyyaml = "^6.0.1"
2626
rdflib = ">=6.0.0"
2727
scipy = {version = "*", extras = ["scipy"]}
28+
importlib-resources = "^6.1.1"
2829

2930
[tool.poetry.group.dev.dependencies]
3031
pytest = {version = ">=7.1.2"}

src/sssom/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66
from functools import cached_property, lru_cache
77
from typing import Any, Dict, List, Literal, Set
88

9-
import pkg_resources
9+
import importlib_resources
1010
import yaml
1111
from linkml_runtime.utils.schema_as_dict import schema_as_dict
1212
from linkml_runtime.utils.schemaview import SchemaView
1313

1414
HERE = pathlib.Path(__file__).parent.resolve()
1515

16-
SCHEMA_YAML = pkg_resources.resource_filename("sssom_schema", "schema/sssom_schema.yaml")
16+
SCHEMA_YAML = importlib_resources.files("sssom_schema").joinpath("schema/sssom_schema.yaml")
1717
EXTENDED_PREFIX_MAP = HERE / "obo.epm.json"
1818

1919
OWL_EQUIV_CLASS_URI = "http://www.w3.org/2002/07/owl#equivalentClass"

src/sssom/context.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import Mapping, Union
66

77
import curies
8-
import pkg_resources
8+
import importlib_resources
99
from curies import Converter
1010
from rdflib.namespace import is_ncname
1111

@@ -19,8 +19,8 @@
1919
]
2020

2121
SSSOM_BUILT_IN_PREFIXES = ("sssom", "owl", "rdf", "rdfs", "skos", "semapv")
22-
SSSOM_CONTEXT = pkg_resources.resource_filename(
23-
"sssom_schema", "context/sssom_schema.context.jsonld"
22+
SSSOM_CONTEXT = importlib_resources.files("sssom_schema").joinpath(
23+
"context/sssom_schema.context.jsonld"
2424
)
2525

2626

src/sssom/util.py

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,13 @@
8080
KEY_FEATURES = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID, PREDICATE_MODIFIER]
8181
TRIPLES_IDS = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID]
8282

83+
# ! This will be unnecessary when pandas >= 3.0.0 is released
84+
# ! https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.infer_objects.html#
85+
# A value is trying to be set on a copy of a slice from a DataFrame
86+
pd.options.mode.copy_on_write = True
87+
# Get the version of pandas as a tuple of integers
88+
pandas_version = tuple(map(int, pd.__version__.split(".")))
89+
8390

8491
@dataclass
8592
class MappingSetDataFrame:
@@ -151,6 +158,12 @@ def from_mapping_set_document(cls, doc: MappingSetDocument) -> "MappingSetDataFr
151158
df = pd.DataFrame(get_dict_from_mapping(mapping) for mapping in doc.mapping_set.mappings)
152159
meta = _extract_global_metadata(doc)
153160

161+
if pandas_version >= (2, 0, 0):
162+
# For pandas >= 2.0.0, use the 'copy' parameter
163+
df = df.infer_objects(copy=False)
164+
else:
165+
# For pandas < 2.0.0, call 'infer_objects()' without any parameters
166+
df = df.infer_objects()
154167
# remove columns where all values are blank.
155168
df.replace("", np.nan, inplace=True)
156169
df.dropna(axis=1, how="all", inplace=True) # remove columns with all row = 'None'-s.
@@ -160,6 +173,14 @@ def from_mapping_set_document(cls, doc: MappingSetDocument) -> "MappingSetDataFr
160173
slot for slot, slot_metadata in slots.items() if slot_metadata["range"] == "double"
161174
}
162175
non_double_cols = df.loc[:, ~df.columns.isin(slots_with_double_as_range)]
176+
177+
if pandas_version >= (2, 0, 0):
178+
# For pandas >= 2.0.0, use the 'copy' parameter
179+
non_double_cols = non_double_cols.infer_objects(copy=False)
180+
else:
181+
# For pandas < 2.0.0, call 'infer_objects()' without any parameters
182+
non_double_cols = non_double_cols.infer_objects()
183+
163184
non_double_cols.replace(np.nan, "", inplace=True)
164185
df.update(non_double_cols)
165186

@@ -1397,18 +1418,26 @@ def invert_mappings(
13971418
non_predicate_modified_df = df
13981419

13991420
if subject_prefix:
1400-
subject_starts_with_prefix_condition = df[SUBJECT_ID].str.startswith(subject_prefix + ":")
1401-
object_starts_with_prefix_condition = df[OBJECT_ID].str.startswith(subject_prefix + ":")
1421+
# Filter rows where 'SUBJECT_ID' starts with the prefix but 'OBJECT_ID' does not
14021422
prefixed_subjects_df = pd.DataFrame(
14031423
non_predicate_modified_df[
1404-
(subject_starts_with_prefix_condition & ~object_starts_with_prefix_condition)
1424+
(
1425+
non_predicate_modified_df[SUBJECT_ID].str.startswith(subject_prefix + ":")
1426+
& ~non_predicate_modified_df[OBJECT_ID].str.startswith(subject_prefix + ":")
1427+
)
14051428
]
14061429
)
1430+
1431+
# Filter rows where 'SUBJECT_ID' does not start with the prefix but 'OBJECT_ID' does
14071432
non_prefix_subjects_df = pd.DataFrame(
14081433
non_predicate_modified_df[
1409-
(~subject_starts_with_prefix_condition & object_starts_with_prefix_condition)
1434+
(
1435+
~non_predicate_modified_df[SUBJECT_ID].str.startswith(subject_prefix + ":")
1436+
& non_predicate_modified_df[OBJECT_ID].str.startswith(subject_prefix + ":")
1437+
)
14101438
]
14111439
)
1440+
14121441
df_to_invert = non_prefix_subjects_df.loc[
14131442
non_prefix_subjects_df[PREDICATE_ID].isin(list(predicate_invert_map.keys()))
14141443
]

0 commit comments

Comments
 (0)