Skip to content

Commit d439b60

Browse files
committed
Update parsers.py
1 parent 788adcc commit d439b60

File tree

1 file changed

+5
-7
lines changed

1 file changed

+5
-7
lines changed

src/sssom/parsers.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import logging as _logging
88
import os.path
99
import typing
10-
from collections import ChainMap, Counter
10+
from collections import ChainMap, Counter, defaultdict
1111
from pathlib import Path
1212
from typing import (
1313
Any,
@@ -1025,7 +1025,7 @@ def split_dataframe_by_prefix(
10251025
df = msdf.df
10261026
meta = msdf.metadata
10271027
split_to_msdf: Dict[str, MappingSetDataFrame] = {}
1028-
mappings_by_group: dict[SSSOMSplitGroup, list[dict]] = {}
1028+
mappings_by_group: defaultdict[SSSOMSplitGroup, list[dict]] = defaultdict(list)
10291029

10301030
# Build up a dict of groups by which mappings should be stored.
10311031
for subject_prefix, object_prefix, relation in itt.product(
@@ -1043,21 +1043,19 @@ def split_dataframe_by_prefix(
10431043
if object_prefix not in msdf.converter.bimap:
10441044
logging.warning(f"{split} - missing object prefix - {object_prefix}")
10451045
continue
1046-
mappings_by_group[group] = []
10471046

10481047
# Store mappings by each group of interest.
10491048
for _mapping in df.itertuples(index=False, name="Row"):
10501049
mapping = cast(NamedTuple, _mapping)._asdict()
10511050
subject_curie = msdf.converter.parse_curie(mapping[SUBJECT_ID], strict=True)
10521051
object_curie = msdf.converter.parse_curie(mapping[OBJECT_ID], strict=True)
1053-
relation_curie = msdf.converter.parse_curie(mapping[PREDICATE_ID], strict=True)
1052+
relation_tuple = msdf.converter.parse_curie(mapping[PREDICATE_ID], strict=True)
10541053
group = SSSOMSplitGroup(
10551054
subject_curie.prefix,
10561055
object_curie.prefix,
1057-
relation_curie,
1056+
relation_tuple,
10581057
)
1059-
if group in mappings_by_group:
1060-
mappings_by_group[group].append(mapping)
1058+
mappings_by_group[group].append(mapping)
10611059

10621060
# Convert the mappings in each group to a MappingSetDataFrame and index them
10631061
# by a string identifier.

0 commit comments

Comments
 (0)