77import logging as _logging
88import os .path
99import typing
10- from collections import ChainMap , Counter
10+ from collections import ChainMap , Counter , defaultdict
1111from pathlib import Path
1212from typing import (
1313 Any ,
@@ -1025,7 +1025,7 @@ def split_dataframe_by_prefix(
10251025 df = msdf .df
10261026 meta = msdf .metadata
10271027 split_to_msdf : Dict [str , MappingSetDataFrame ] = {}
1028- mappings_by_group : dict [SSSOMSplitGroup , list [dict ]] = {}
1028+ mappings_by_group : defaultdict [SSSOMSplitGroup , list [dict ]] = defaultdict ( list )
10291029
10301030 # Build up a dict of groups by which mappings should be stored.
10311031 for subject_prefix , object_prefix , relation in itt .product (
@@ -1043,21 +1043,19 @@ def split_dataframe_by_prefix(
10431043 if object_prefix not in msdf .converter .bimap :
10441044 logging .warning (f"{ split } - missing object prefix - { object_prefix } " )
10451045 continue
1046- mappings_by_group [group ] = []
10471046
10481047 # Store mappings by each group of interest.
10491048 for _mapping in df .itertuples (index = False , name = "Row" ):
10501049 mapping = cast (NamedTuple , _mapping )._asdict ()
10511050 subject_curie = msdf .converter .parse_curie (mapping [SUBJECT_ID ], strict = True )
10521051 object_curie = msdf .converter .parse_curie (mapping [OBJECT_ID ], strict = True )
1053- relation_curie = msdf .converter .parse_curie (mapping [PREDICATE_ID ], strict = True )
1052+ relation_tuple = msdf .converter .parse_curie (mapping [PREDICATE_ID ], strict = True )
10541053 group = SSSOMSplitGroup (
10551054 subject_curie .prefix ,
10561055 object_curie .prefix ,
1057- relation_curie ,
1056+ relation_tuple ,
10581057 )
1059- if group in mappings_by_group :
1060- mappings_by_group [group ].append (mapping )
1058+ mappings_by_group [group ].append (mapping )
10611059
10621060 # Convert the mappings in each group to a MappingSetDataFrame and index them
10631061 # by a string identifier.
0 commit comments