|
29 | 29 | from_sssom_rdf, |
30 | 30 | parse_sssom_table, |
31 | 31 | split_dataframe, |
| 32 | + split_dataframe_by_prefix, |
32 | 33 | ) |
33 | 34 | from sssom.util import MappingSetDataFrame, sort_df_rows_columns |
34 | 35 | from sssom.writers import WRITER_FUNCTIONS, write_table |
@@ -275,7 +276,7 @@ def test_parse_sssom_json(self): |
275 | 276 | ) |
276 | 277 |
|
277 | 278 | def test_split_msdf(self): |
278 | | - """Test splitting a mapping set dataframe""" |
| 279 | + """Test splitting a mapping set dataframe.""" |
279 | 280 | msdf = from_sssom_dataframe(df=self.df, prefix_map=self.df_converter, meta=self.df_meta) |
280 | 281 | splitted = split_dataframe(msdf) |
281 | 282 | self.assertEqual( |
@@ -529,3 +530,60 @@ def test_check_irregular_metadata(self): |
529 | 530 | self.assertTrue(is_irregular_metadata_fail_missing_property_case) |
530 | 531 | self.assertTrue(is_valid_extension) |
531 | 532 | self.assertFalse(is_irregular_metadata_ok_case) |
| 533 | + |
| 534 | + |
| 535 | +class TestSplit(unittest.TestCase): |
| 536 | + """A test case for dataframe utilities.""" |
| 537 | + |
| 538 | + def test_split_df(self) -> None: |
| 539 | + """Test the precursor to SSSOM function.""" |
| 540 | + converter = Converter.from_prefix_map( |
| 541 | + { |
| 542 | + "p1": "https://example.org/p1/", |
| 543 | + "p2": "https://example.org/p2/", |
| 544 | + "p3": "https://example.org/p3/", |
| 545 | + "p4": "https://example.org/p4/", |
| 546 | + "p5": "https://example.org/p5/", |
| 547 | + "p6": "https://example.org/p6/", |
| 548 | + "skos": "http://www.w3.org/2004/02/skos/core#", |
| 549 | + "semapv": "https://w3id.org/semapv/vocab/", |
| 550 | + } |
| 551 | + ) |
| 552 | + subrows = [ |
| 553 | + ("p1:1", "skos:exactMatch", "p2:1", "semapv:ManualMappingCuration"), |
| 554 | + ("p1:2", "skos:exactMatch", "p2:2", "semapv:ManualMappingCuration"), |
| 555 | + ] |
| 556 | + rows = [ |
| 557 | + *subrows, |
| 558 | + ("p1:2", "skos:exactMatch", "p3:2", "semapv:ManualMappingCuration"), |
| 559 | + ("p4:1", "skos:exactMatch", "p1:1", "semapv:ManualMappingCuration"), |
| 560 | + ("p5:1", "skos:broadMatch", "p6:1", "semapv:ManualMappingCuration"), |
| 561 | + ("p1:7", "skos:broadMatch", "p2:7", "semapv:ManualMappingCuration"), |
| 562 | + ] |
| 563 | + columns = ["subject_id", "predicate_id", "object_id", "mapping_justification"] |
| 564 | + df = pd.DataFrame(rows, columns=columns) |
| 565 | + msdf = from_sssom_dataframe(df, converter) |
| 566 | + |
| 567 | + # test that if there's ever an empty list, then it returns an empty dict |
| 568 | + self.assertFalse(split_dataframe_by_prefix(msdf, [], ["p2"], ["skos:exactMatch"])) |
| 569 | + self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], ["p2"], [])) |
| 570 | + self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], [], ["skos:exactMatch"])) |
| 571 | + |
| 572 | + # test that missing prefixes don't result in anything |
| 573 | + self.assertFalse(split_dataframe_by_prefix(msdf, ["nope"], ["p2"], ["skos:exactMatch"])) |
| 574 | + self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], ["nope"], ["skos:exactMatch"])) |
| 575 | + self.assertFalse(split_dataframe_by_prefix(msdf, ["p1"], ["p2"], ["nope:nope"])) |
| 576 | + |
| 577 | + sdf = pd.DataFrame(subrows, columns=columns) |
| 578 | + # test an explicit return with only single entries |
| 579 | + rv = split_dataframe_by_prefix(msdf, ["p1"], ["p2"], ["skos:exactMatch"]) |
| 580 | + self.assertEqual(1, len(rv), msg="nothing was indexed") |
| 581 | + self.assertIn("p1_exactmatch_p2", rv) |
| 582 | + self.assertEqual(sdf.values.tolist(), rv["p1_exactmatch_p2"].df.values.tolist()) |
| 583 | + |
| 584 | + # test an explicit return with multiple entries |
| 585 | + rv = split_dataframe_by_prefix(msdf, ["p1"], ["p2", "p3"], ["skos:exactMatch"]) |
| 586 | + self.assertEqual(2, len(rv), msg="nothing was indexed") |
| 587 | + self.assertIn("p1_exactmatch_p2", rv) |
| 588 | + self.assertIn("p1_exactmatch_p3", rv) |
| 589 | + self.assertEqual(sdf.values.tolist(), rv["p1_exactmatch_p2"].df.values.tolist()) |
0 commit comments