Update parsers.py

cthoyt · cthoyt · commit d439b6063e63 · 2025-09-06T16:36:28.000+02:00
diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py
@@ -7,7 +7,7 @@
 import logging as _logging
 import os.path
 import typing
-from collections import ChainMap, Counter
+from collections import ChainMap, Counter, defaultdict
 from pathlib import Path
 from typing import (
     Any,
@@ -1025,7 +1025,7 @@ def split_dataframe_by_prefix(
     df = msdf.df
     meta = msdf.metadata
     split_to_msdf: Dict[str, MappingSetDataFrame] = {}
-    mappings_by_group: dict[SSSOMSplitGroup, list[dict]] = {}
+    mappings_by_group: defaultdict[SSSOMSplitGroup, list[dict]] = defaultdict(list)
 
     # Build up a dict of groups by which mappings should be stored.
     for subject_prefix, object_prefix, relation in itt.product(
@@ -1043,21 +1043,19 @@ def split_dataframe_by_prefix(
         if object_prefix not in msdf.converter.bimap:
             logging.warning(f"{split} - missing object prefix - {object_prefix}")
             continue
-        mappings_by_group[group] = []
 
     # Store mappings by each group of interest.
     for _mapping in df.itertuples(index=False, name="Row"):
         mapping = cast(NamedTuple, _mapping)._asdict()
         subject_curie = msdf.converter.parse_curie(mapping[SUBJECT_ID], strict=True)
         object_curie = msdf.converter.parse_curie(mapping[OBJECT_ID], strict=True)
-        relation_curie = msdf.converter.parse_curie(mapping[PREDICATE_ID], strict=True)
+        relation_tuple = msdf.converter.parse_curie(mapping[PREDICATE_ID], strict=True)
         group = SSSOMSplitGroup(
             subject_curie.prefix,
             object_curie.prefix,
-            relation_curie,
+            relation_tuple,
         )
-        if group in mappings_by_group:
-            mappings_by_group[group].append(mapping)
+        mappings_by_group[group].append(mapping)
 
     # Convert the mappings in each group to a MappingSetDataFrame and index them
     # by a string identifier.