From aa7012f82546ca2dcc0508f7a8ea480bc9a63056 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Dec 2025 14:25:58 +0000 Subject: [PATCH 1/5] Initial plan From b91a4db007aa2c87ce3e4744275f437f5ac9f9f0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Dec 2025 14:32:09 +0000 Subject: [PATCH 2/5] Add group-averaged segstats maps by contrast Co-authored-by: akhanf <11492701+akhanf@users.noreply.github.com> --- spimquant/workflow/Snakefile | 38 ++++ spimquant/workflow/rules/groupstats.smk | 95 ++++++++++ .../scripts/concat_subj_segstats_contrast.py | 165 ++++++++++++++++++ 3 files changed, 298 insertions(+) create mode 100644 spimquant/workflow/scripts/concat_subj_segstats_contrast.py diff --git a/spimquant/workflow/Snakefile b/spimquant/workflow/Snakefile index aa21b82..70aed1b 100644 --- a/spimquant/workflow/Snakefile +++ b/spimquant/workflow/Snakefile @@ -494,6 +494,44 @@ rule all_group_stats: contrast_column=config["contrast_column"], contrast_value=config["contrast_values"], ), + # Add group-averaged segstats maps by contrast + expand( + bids( + root=root, + datatype="group", + seg="{seg}", + space="{template}", + desc="{desc}", + contrast="{contrast_column}+{contrast_value}", + metric="{stain}+{metric}", + suffix="groupavg.nii.gz", + ), + seg=atlas_segs, + desc=config["seg_method"], + template=config["template"], + stain=stains_for_seg, + metric=config["seg_metrics"], + contrast_column=config["contrast_column"], + contrast_value=config["contrast_values"], + ), + expand( + bids( + root=root, + datatype="group", + seg="{seg}", + space="{template}", + desc="{desc}", + contrast="{contrast_column}+{contrast_value}", + metric="coloc+{metric}", + suffix="groupavg.nii.gz", + ), + seg=atlas_segs, + desc=config["seg_method"], + template=config["template"], + metric=config["coloc_seg_metrics"], + contrast_column=config["contrast_column"], + contrast_value=config["contrast_values"], + ), include: "rules/common.smk" diff --git a/spimquant/workflow/rules/groupstats.smk b/spimquant/workflow/rules/groupstats.smk index 01d160e..e7a7753 100644 --- a/spimquant/workflow/rules/groupstats.smk +++ b/spimquant/workflow/rules/groupstats.smk @@ -349,3 +349,98 @@ rule group_coloc_counts_per_voxel_contrast: runtime=10, script: "../scripts/coloc_per_voxel_template.py" + + +rule concat_subj_segstats_contrast: + """Concatenate segstats.tsv files across subjects filtered by contrast + and compute group averages. + + This rule collects mergedsegstats.tsv files from all participants, + adds a participant_id column to identify each subject's data, + merges with participant metadata from participants.tsv, filters + to include only rows where the contrast_column matches the + contrast_value, and computes group averages for each atlas region. + """ + input: + segstats_tsvs=lambda wildcards: inputs["spim"].expand( + bids( + root=root, + datatype="micr", + seg=wildcards.seg, + from_=wildcards.template, + desc=wildcards.desc, + suffix="mergedsegstats.tsv", + **inputs["spim"].wildcards, + ) + ), + participants_tsv=os.path.join(config["bids_dir"], "participants.tsv"), + params: + contrast_column="{contrast_column}", + contrast_value="{contrast_value}", + metric_columns=expand( + "{stain}+{metric}", stain=stains_for_seg, metric=config["seg_metrics"] + ), + coloc_metric_columns=expand( + "coloc+{metric}", metric=config["coloc_seg_metrics"] + ), + output: + tsv=bids( + root=root, + datatype="group", + seg="{seg}", + from_="{template}", + desc="{desc}", + contrast="{contrast_column}+{contrast_value}", + suffix="groupavgsegstats.tsv", + ), + threads: 1 + resources: + mem_mb=16000, + runtime=10, + script: + "../scripts/concat_subj_segstats_contrast.py" + + +rule map_groupavg_segstats_to_template_nii: + """Map group-averaged segstats to template space as NIfTI files. + + This rule takes the group-averaged segstats for a specific contrast + and paints brain regions with the averaged metric values to create + volumetric maps for 3D visualization. + """ + input: + tsv=bids( + root=root, + datatype="group", + seg="{seg}", + from_="{template}", + desc="{desc}", + contrast="{contrast_column}+{contrast_value}", + suffix="groupavgsegstats.tsv", + ), + dseg=bids_tpl( + root=root, template="{template}", seg="{seg}", suffix="dseg.nii.gz" + ), + label_tsv=bids_tpl( + root=root, template="{template}", seg="{seg}", suffix="dseg.tsv" + ), + params: + label_column="index", + feature_column="{metric}", + output: + nii=bids( + root=root, + datatype="group", + seg="{seg}", + space="{template}", + desc="{desc}", + contrast="{contrast_column}+{contrast_value}", + metric="{metric}", + suffix="groupavg.nii.gz", + ), + threads: 8 + resources: + mem_mb=16000, + runtime=5, + script: + "../scripts/map_tsv_dseg_to_nii.py" diff --git a/spimquant/workflow/scripts/concat_subj_segstats_contrast.py b/spimquant/workflow/scripts/concat_subj_segstats_contrast.py new file mode 100644 index 0000000..81cbd6c --- /dev/null +++ b/spimquant/workflow/scripts/concat_subj_segstats_contrast.py @@ -0,0 +1,165 @@ +"""Concatenate subject-level segstats.tsv files filtered by contrast and compute group averages. + +This script concatenates segstats.tsv files from multiple participants, adds a +participant_id column to identify each subject's data, merges with participant +metadata from participants.tsv, filters the data to include only rows where the +specified contrast_column matches the contrast_value, and computes group averages +for each atlas region and metric. + +This is a Snakemake script that expects the `snakemake` object to be available, +which is automatically provided when executed as part of a Snakemake workflow. +""" + +import os +from pathlib import Path + +import pandas as pd +import numpy as np + + +def extract_participant_id(path): + """Extract participant_id from a BIDS file path. + + Parameters + ---------- + path : str + Path to a BIDS file + + Returns + ------- + str or None + Participant ID (e.g., 'sub-01') or None if not found + """ + parts = Path(path).parts + for part in parts: + if part.startswith("sub-"): + return part + return None + + +def load_segstats_with_metadata(segstats_paths, participants_df): + """Load all segstats files and merge with participant metadata. + + Parameters + ---------- + segstats_paths : list + List of paths to segstats.tsv files + participants_df : pd.DataFrame + DataFrame containing participant metadata + + Returns + ------- + pd.DataFrame + Combined dataframe with segstats and participant metadata + """ + all_data = [] + + for path in segstats_paths: + if not os.path.exists(path): + continue + + # Extract subject ID from path + subject_id = extract_participant_id(path) + + if subject_id is None: + continue + + # Load segstats + df = pd.read_csv(path, sep="\t") + df["participant_id"] = subject_id + + all_data.append(df) + + if not all_data: + raise ValueError( + f"No valid segstats files found. Attempted to load {len(segstats_paths)} " + f"file(s). Check that the files exist and contain valid data with " + f"subject IDs in the file paths (e.g., 'sub-01')." + ) + + # Combine all segstats + combined = pd.concat(all_data, ignore_index=True) + + # Merge with participants metadata + merged = combined.merge(participants_df, on="participant_id", how="left") + + return merged + + +def compute_group_averages(data, metric_columns): + """Compute group averages for each atlas region and metric. + + Parameters + ---------- + data : pd.DataFrame + Combined dataframe with segstats data + metric_columns : list + List of metric column names to average + + Returns + ------- + pd.DataFrame + Dataframe with group averages for each region + """ + # Group by region (index and name) + groupby_cols = ["index", "name"] + + # Build aggregation dict + agg_dict = {} + for col in metric_columns: + if col in data.columns: + agg_dict[col] = "mean" + + # Compute group averages + group_avg = data.groupby(groupby_cols, as_index=False).agg(agg_dict) + + return group_avg + + +def main(): + """Main function - uses snakemake object provided by Snakemake workflow.""" + # Load participants metadata + participants_df = pd.read_csv(snakemake.input.participants_tsv, sep="\t") + + # Validate participants.tsv has required columns + if "participant_id" not in participants_df.columns: + raise ValueError("participants.tsv must contain a 'participant_id' column") + + # Get contrast filtering parameters + contrast_column = snakemake.params.contrast_column + contrast_value = snakemake.params.contrast_value + + # Validate contrast column exists in participants.tsv + if contrast_column not in participants_df.columns: + raise ValueError( + f"Contrast column '{contrast_column}' not found in participants.tsv. " + f"Available columns: {list(participants_df.columns)}" + ) + + # Load and combine all segstats files + combined_data = load_segstats_with_metadata( + snakemake.input.segstats_tsvs, participants_df + ) + + # Filter data based on contrast column and value + filtered_data = combined_data[combined_data[contrast_column] == contrast_value] + + if filtered_data.empty: + raise ValueError( + f"No data found for {contrast_column}={contrast_value}. " + f"Available values in {contrast_column}: " + f"{combined_data[contrast_column].unique().tolist()}" + ) + + # Get metric columns to average + metric_columns = snakemake.params.metric_columns + snakemake.params.coloc_metric_columns + + # Compute group averages + group_avg = compute_group_averages(filtered_data, metric_columns) + + # Save averaged segstats table + group_avg.to_csv(snakemake.output.tsv, sep="\t", index=False) + + +if __name__ == "__main__": + main() From 7eaeee28c45d260c34f1b0eee10be5b1e2dc8940 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Dec 2025 14:34:07 +0000 Subject: [PATCH 3/5] Update README to document new group-averaged segstats outputs Co-authored-by: akhanf <11492701+akhanf@users.noreply.github.com> --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index a736879..b0189d2 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,8 @@ This will generate: - `*_groupstats.tsv`: Statistical test results (t-statistics, p-values, effect sizes) for each brain region - `*_groupstats.png`: Heatmap visualizations of statistical results - `*_groupstats.nii`: 3D volumetric maps of statistical values for visualization in neuroimaging software +- `*_groupavgsegstats.tsv`: Group-averaged segmentation statistics for each contrast group and brain region +- `*_groupavg.nii.gz`: 3D volumetric maps of group-averaged metrics (fieldfrac, density, volume) for each contrast group # Contributing We welcome contributions! Please refer to the [contributing guidelines](CONTRIBUTING.md) for more details on how to contribute. From 18c43016b3b50757445f4731f5c6e8af3140ec99 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Dec 2025 14:37:07 +0000 Subject: [PATCH 4/5] Add validation for metric columns to prevent errors Co-authored-by: akhanf <11492701+akhanf@users.noreply.github.com> --- .../scripts/concat_subj_segstats_contrast.py | 27 ++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/spimquant/workflow/scripts/concat_subj_segstats_contrast.py b/spimquant/workflow/scripts/concat_subj_segstats_contrast.py index 81cbd6c..dab300b 100644 --- a/spimquant/workflow/scripts/concat_subj_segstats_contrast.py +++ b/spimquant/workflow/scripts/concat_subj_segstats_contrast.py @@ -104,11 +104,25 @@ def compute_group_averages(data, metric_columns): # Group by region (index and name) groupby_cols = ["index", "name"] - # Build aggregation dict + # Build aggregation dict - only include columns that exist agg_dict = {} + missing_columns = [] for col in metric_columns: if col in data.columns: agg_dict[col] = "mean" + else: + missing_columns.append(col) + + # Warn about missing columns to aid debugging + if missing_columns: + print(f"Warning: The following metric columns were not found in data: {missing_columns}") + print(f"Available columns: {list(data.columns)}") + + if not agg_dict: + raise ValueError( + f"None of the specified metric columns were found in the data. " + f"Requested: {metric_columns}. Available: {list(data.columns)}" + ) # Compute group averages group_avg = data.groupby(groupby_cols, as_index=False).agg(agg_dict) @@ -151,8 +165,15 @@ def main(): f"{combined_data[contrast_column].unique().tolist()}" ) - # Get metric columns to average - metric_columns = snakemake.params.metric_columns + snakemake.params.coloc_metric_columns + # Get metric columns to average - handle None values + metric_columns = [] + if snakemake.params.metric_columns is not None: + metric_columns.extend(snakemake.params.metric_columns) + if snakemake.params.coloc_metric_columns is not None: + metric_columns.extend(snakemake.params.coloc_metric_columns) + + if not metric_columns: + raise ValueError("No metric columns specified for averaging") # Compute group averages group_avg = compute_group_averages(filtered_data, metric_columns) From f0e2ee0d1ff909240c5c6b51757dbcdc43a7eb35 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Thu, 18 Dec 2025 20:50:19 -0500 Subject: [PATCH 5/5] fmt --- .../scripts/concat_subj_segstats_contrast.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/spimquant/workflow/scripts/concat_subj_segstats_contrast.py b/spimquant/workflow/scripts/concat_subj_segstats_contrast.py index dab300b..0dfa50d 100644 --- a/spimquant/workflow/scripts/concat_subj_segstats_contrast.py +++ b/spimquant/workflow/scripts/concat_subj_segstats_contrast.py @@ -103,7 +103,7 @@ def compute_group_averages(data, metric_columns): """ # Group by region (index and name) groupby_cols = ["index", "name"] - + # Build aggregation dict - only include columns that exist agg_dict = {} missing_columns = [] @@ -112,21 +112,23 @@ def compute_group_averages(data, metric_columns): agg_dict[col] = "mean" else: missing_columns.append(col) - + # Warn about missing columns to aid debugging if missing_columns: - print(f"Warning: The following metric columns were not found in data: {missing_columns}") + print( + f"Warning: The following metric columns were not found in data: {missing_columns}" + ) print(f"Available columns: {list(data.columns)}") - + if not agg_dict: raise ValueError( f"None of the specified metric columns were found in the data. " f"Requested: {metric_columns}. Available: {list(data.columns)}" ) - + # Compute group averages group_avg = data.groupby(groupby_cols, as_index=False).agg(agg_dict) - + return group_avg @@ -171,7 +173,7 @@ def main(): metric_columns.extend(snakemake.params.metric_columns) if snakemake.params.coloc_metric_columns is not None: metric_columns.extend(snakemake.params.coloc_metric_columns) - + if not metric_columns: raise ValueError("No metric columns specified for averaging")