Skip to content

Commit d83d398

Browse files
authored
Fixes #305 and #306 (#308)
* Fixes validate #306 * fix for #305 * blank values for prefixes shouls not be added. * undo refactor and change logic * changed metadata check logic * changed logic to get pref from meta * formatted * casting issue * mypy fix * mypy fix * edits after discussion with Nico on PR
1 parent 5490672 commit d83d398

File tree

2 files changed

+45
-9
lines changed

2 files changed

+45
-9
lines changed

sssom/context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
# EXTERNAL_CONTEXT_PATH = HERE / "sssom.external.context.jsonld"
1818

1919
SSSOM_URI_PREFIX = "https://w3id.org/sssom/"
20-
SSSOM_BUILT_IN_PREFIXES = ["sssom", "owl", "rdf", "rdfs", "skos"]
20+
SSSOM_BUILT_IN_PREFIXES = ["sssom", "owl", "rdf", "rdfs", "skos", "semapv"]
2121
DEFAULT_MAPPING_SET_ID = f"{SSSOM_URI_PREFIX}mappings/{uuid.uuid4()}"
2222
DEFAULT_LICENSE = f"{SSSOM_URI_PREFIX}license/unspecified"
2323

sssom/util.py

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,12 @@
7373
SUBJECT_LABEL,
7474
SUBJECT_SOURCE,
7575
)
76-
from .context import SSSOM_URI_PREFIX, get_default_metadata, get_jsonld_context
76+
from .context import (
77+
SSSOM_BUILT_IN_PREFIXES,
78+
SSSOM_URI_PREFIX,
79+
get_default_metadata,
80+
get_jsonld_context,
81+
)
7782
from .sssom_document import MappingSetDocument
7883
from .typehints import Metadata, MetadataType, PrefixMap
7984

@@ -145,15 +150,22 @@ def __str__(self) -> str: # noqa:D105
145150

146151
def clean_prefix_map(self) -> None:
147152
"""Remove unused prefixes from the internal prefix map based on the internal dataframe."""
148-
prefixes_in_map = get_prefixes_used_in_table(self.df)
153+
all_prefixes = []
154+
prefixes_in_table = get_prefixes_used_in_table(self.df)
155+
if self.metadata:
156+
prefixes_in_metadata = get_prefixes_used_in_metadata(self.metadata)
157+
all_prefixes = list(set(prefixes_in_table + prefixes_in_metadata))
158+
else:
159+
all_prefixes = prefixes_in_table
160+
149161
new_prefixes: PrefixMap = dict()
150162
missing_prefixes = []
151-
for prefix in prefixes_in_map:
163+
for prefix in all_prefixes:
152164
if prefix in self.prefix_map:
153165
new_prefixes[prefix] = self.prefix_map[prefix]
154166
else:
155167
logging.warning(
156-
f"{prefix} is used in the data frame but does not exist in prefix map"
168+
f"{prefix} is used in the SSSOM mapping set but it does not exist in the prefix map"
157169
)
158170
missing_prefixes.append(prefix)
159171
if missing_prefixes:
@@ -1086,11 +1098,34 @@ def curie_from_uri(uri: str, prefix_map: Mapping[str, str]) -> str:
10861098

10871099
def get_prefixes_used_in_table(df: pd.DataFrame) -> List[str]:
10881100
"""Get a list of prefixes used in CURIEs in key feature columns in a dataframe."""
1089-
prefixes = []
1101+
prefixes = SSSOM_BUILT_IN_PREFIXES
10901102
if not df.empty:
1091-
for col in KEY_FEATURES:
1092-
for v in df[col].values:
1093-
prefixes.append(get_prefix_from_curie(v))
1103+
for col in ENTITY_REFERENCE_SLOTS:
1104+
if col in df.columns:
1105+
for v in df[col].values:
1106+
pref = get_prefix_from_curie(str(v))
1107+
if pref != "" and not None:
1108+
prefixes.append(pref)
1109+
return list(set(prefixes))
1110+
1111+
1112+
def get_prefixes_used_in_metadata(meta: MetadataType) -> List[str]:
1113+
"""Get a list of prefixes used in CURIEs in the metadata."""
1114+
prefixes = SSSOM_BUILT_IN_PREFIXES
1115+
if meta:
1116+
for v in meta.values():
1117+
if type(v) is list:
1118+
prefixes.extend(
1119+
[
1120+
get_prefix_from_curie(x)
1121+
for x in v
1122+
if get_prefix_from_curie(x) != ""
1123+
]
1124+
)
1125+
else:
1126+
pref = get_prefix_from_curie(str(v))
1127+
if pref != "" and not None:
1128+
prefixes.append(pref)
10941129
return list(set(prefixes))
10951130

10961131

@@ -1374,6 +1409,7 @@ def get_all_prefixes(msdf: MappingSetDataFrame) -> list:
13741409
[
13751410
get_prefix_from_curie(s)
13761411
for s in list(set(msdf.df[slot].to_list())) # type: ignore
1412+
if get_prefix_from_curie(s) != ""
13771413
]
13781414
)
13791415
)

0 commit comments

Comments
 (0)