Skip to content

Commit 02c5ccf

Browse files
committed
Merge branch 'master' into pr/609
2 parents 7db6484 + 3721439 commit 02c5ccf

File tree

2 files changed

+68
-8
lines changed

2 files changed

+68
-8
lines changed

src/sssom/parsers.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -996,16 +996,18 @@ class SSSOMSplitGroup(NamedTuple):
996996
object_prefix: str
997997
relation_curie: ReferenceTuple
998998

999-
def as_identifier(self):
1000-
return "_".join(
1001-
[
1002-
self.subject_prefix.lower(),
1003-
self.relation_curie.identifier.lower(),
1004-
self.object_prefix.lower(),
1005-
]
999+
def as_identifier(self) -> str:
1000+
"""Get a split group key."""
1001+
return _get_split_key(
1002+
self.subject_prefix, self.relation_curie.identifier, self.object_prefix
10061003
)
10071004

10081005

1006+
def _get_split_key(subject_prefix: str, relation_luid: str, object_prefix: str) -> str:
1007+
split = f"{subject_prefix.lower()}_{relation_luid.lower()}_{object_prefix.lower()}"
1008+
return split
1009+
1010+
10091011
def split_dataframe_by_prefix(
10101012
msdf: MappingSetDataFrame,
10111013
subject_prefixes: Iterable[str],

tests/test_parsers.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from_sssom_rdf,
3030
parse_sssom_table,
3131
split_dataframe,
32+
split_dataframe_by_prefix,
3233
)
3334
from sssom.util import MappingSetDataFrame, sort_df_rows_columns
3435
from sssom.writers import WRITER_FUNCTIONS, write_table
@@ -275,7 +276,7 @@ def test_parse_sssom_json(self):
275276
)
276277

277278
def test_split_msdf(self):
278-
"""Test splitting a mapping set dataframe"""
279+
"""Test splitting a mapping set dataframe."""
279280
msdf = from_sssom_dataframe(df=self.df, prefix_map=self.df_converter, meta=self.df_meta)
280281
splitted = split_dataframe(msdf)
281282
self.assertEqual(
@@ -529,3 +530,60 @@ def test_check_irregular_metadata(self):
529530
self.assertTrue(is_irregular_metadata_fail_missing_property_case)
530531
self.assertTrue(is_valid_extension)
531532
self.assertFalse(is_irregular_metadata_ok_case)
533+
534+
535+
class TestSplit(unittest.TestCase):
536+
"""A test case for dataframe utilities."""
537+
538+
def test_split_df(self) -> None:
539+
"""Test the precursor to SSSOM function."""
540+
converter = Converter.from_prefix_map(
541+
{
542+
"p1": "https://example.org/p1/",
543+
"p2": "https://example.org/p2/",
544+
"p3": "https://example.org/p3/",
545+
"p4": "https://example.org/p4/",
546+
"p5": "https://example.org/p5/",
547+
"p6": "https://example.org/p6/",
548+
"skos": "http://www.w3.org/2004/02/skos/core#",
549+
"semapv": "https://w3id.org/semapv/vocab/",
550+
}
551+
)
552+
subrows = [
553+
("p1:1", "skos:exactMatch", "p2:1", "semapv:ManualMappingCuration"),
554+
("p1:2", "skos:exactMatch", "p2:2", "semapv:ManualMappingCuration"),
555+
]
556+
rows = [
557+
*subrows,
558+
("p1:2", "skos:exactMatch", "p3:2", "semapv:ManualMappingCuration"),
559+
("p4:1", "skos:exactMatch", "p1:1", "semapv:ManualMappingCuration"),
560+
("p5:1", "skos:broadMatch", "p6:1", "semapv:ManualMappingCuration"),
561+
("p1:7", "skos:broadMatch", "p2:7", "semapv:ManualMappingCuration"),
562+
]
563+
columns = ["subject_id", "predicate_id", "object_id", "mapping_justification"]
564+
df = pd.DataFrame(rows, columns=columns)
565+
msdf = from_sssom_dataframe(df, converter)
566+
567+
# test that if there's ever an empty list, then it returns an empty dict
568+
self.assertFalse(split_dataframe_by_prefix(msdf, [], ["p2"], ["skos:exactMatch"]))
569+
self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], ["p2"], []))
570+
self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], [], ["skos:exactMatch"]))
571+
572+
# test that missing prefixes don't result in anything
573+
self.assertFalse(split_dataframe_by_prefix(msdf, ["nope"], ["p2"], ["skos:exactMatch"]))
574+
self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], ["nope"], ["skos:exactMatch"]))
575+
self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], ["p2"], ["nope:nope"]))
576+
577+
sdf = pd.DataFrame(subrows, columns=columns)
578+
# test an explicit return with only single entries
579+
rv = split_dataframe_by_prefix(msdf, ["p1"], ["p2"], ["skos:exactMatch"])
580+
self.assertEqual(1, len(rv), msg="nothing was indexed")
581+
self.assertIn("p1_exactmatch_p2", rv)
582+
self.assertEqual(sdf.values.tolist(), rv["p1_exactmatch_p2"].df.values.tolist())
583+
584+
# test an explicit return with multiple entries
585+
rv = split_dataframe_by_prefix(msdf, ["p1"], ["p2", "p3"], ["skos:exactMatch"])
586+
self.assertEqual(2, len(rv), msg="nothing was indexed")
587+
self.assertIn("p1_exactmatch_p2", rv)
588+
self.assertIn("p1_exactmatch_p3", rv)
589+
self.assertEqual(sdf.values.tolist(), rv["p1_exactmatch_p2"].df.values.tolist())

0 commit comments

Comments
 (0)