Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ This will generate:
- `*_groupstats.tsv`: Statistical test results (t-statistics, p-values, effect sizes) for each brain region
- `*_groupstats.png`: Heatmap visualizations of statistical results
- `*_groupstats.nii`: 3D volumetric maps of statistical values for visualization in neuroimaging software
- `*_groupavgsegstats.tsv`: Group-averaged segmentation statistics for each contrast group and brain region
- `*_groupavg.nii.gz`: 3D volumetric maps of group-averaged metrics (fieldfrac, density, volume) for each contrast group

# Contributing
We welcome contributions! Please refer to the [contributing guidelines](CONTRIBUTING.md) for more details on how to contribute.
Expand Down
38 changes: 38 additions & 0 deletions spimquant/workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,44 @@ rule all_group_stats:
contrast_column=config["contrast_column"],
contrast_value=config["contrast_values"],
),
# Add group-averaged segstats maps by contrast
expand(
bids(
root=root,
datatype="group",
seg="{seg}",
space="{template}",
desc="{desc}",
contrast="{contrast_column}+{contrast_value}",
metric="{stain}+{metric}",
suffix="groupavg.nii.gz",
),
seg=atlas_segs,
desc=config["seg_method"],
template=config["template"],
stain=stains_for_seg,
metric=config["seg_metrics"],
contrast_column=config["contrast_column"],
contrast_value=config["contrast_values"],
),
expand(
bids(
root=root,
datatype="group",
seg="{seg}",
space="{template}",
desc="{desc}",
contrast="{contrast_column}+{contrast_value}",
metric="coloc+{metric}",
suffix="groupavg.nii.gz",
),
seg=atlas_segs,
desc=config["seg_method"],
template=config["template"],
metric=config["coloc_seg_metrics"],
contrast_column=config["contrast_column"],
contrast_value=config["contrast_values"],
),


include: "rules/common.smk"
Expand Down
95 changes: 95 additions & 0 deletions spimquant/workflow/rules/groupstats.smk
Original file line number Diff line number Diff line change
Expand Up @@ -349,3 +349,98 @@ rule group_coloc_counts_per_voxel_contrast:
runtime=10,
script:
"../scripts/coloc_per_voxel_template.py"


rule concat_subj_segstats_contrast:
"""Concatenate segstats.tsv files across subjects filtered by contrast
and compute group averages.

This rule collects mergedsegstats.tsv files from all participants,
adds a participant_id column to identify each subject's data,
merges with participant metadata from participants.tsv, filters
to include only rows where the contrast_column matches the
contrast_value, and computes group averages for each atlas region.
"""
input:
segstats_tsvs=lambda wildcards: inputs["spim"].expand(
bids(
root=root,
datatype="micr",
seg=wildcards.seg,
from_=wildcards.template,
desc=wildcards.desc,
suffix="mergedsegstats.tsv",
**inputs["spim"].wildcards,
)
),
participants_tsv=os.path.join(config["bids_dir"], "participants.tsv"),
params:
contrast_column="{contrast_column}",
contrast_value="{contrast_value}",
metric_columns=expand(
"{stain}+{metric}", stain=stains_for_seg, metric=config["seg_metrics"]
),
coloc_metric_columns=expand(
"coloc+{metric}", metric=config["coloc_seg_metrics"]
),
output:
tsv=bids(
root=root,
datatype="group",
seg="{seg}",
from_="{template}",
desc="{desc}",
contrast="{contrast_column}+{contrast_value}",
suffix="groupavgsegstats.tsv",
),
threads: 1
resources:
mem_mb=16000,
runtime=10,
script:
"../scripts/concat_subj_segstats_contrast.py"


rule map_groupavg_segstats_to_template_nii:
"""Map group-averaged segstats to template space as NIfTI files.

This rule takes the group-averaged segstats for a specific contrast
and paints brain regions with the averaged metric values to create
volumetric maps for 3D visualization.
"""
input:
tsv=bids(
root=root,
datatype="group",
seg="{seg}",
from_="{template}",
desc="{desc}",
contrast="{contrast_column}+{contrast_value}",
suffix="groupavgsegstats.tsv",
),
dseg=bids_tpl(
root=root, template="{template}", seg="{seg}", suffix="dseg.nii.gz"
),
label_tsv=bids_tpl(
root=root, template="{template}", seg="{seg}", suffix="dseg.tsv"
),
params:
label_column="index",
feature_column="{metric}",
output:
nii=bids(
root=root,
datatype="group",
seg="{seg}",
space="{template}",
desc="{desc}",
contrast="{contrast_column}+{contrast_value}",
metric="{metric}",
suffix="groupavg.nii.gz",
),
threads: 8
resources:
mem_mb=16000,
runtime=5,
script:
"../scripts/map_tsv_dseg_to_nii.py"
188 changes: 188 additions & 0 deletions spimquant/workflow/scripts/concat_subj_segstats_contrast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
"""Concatenate subject-level segstats.tsv files filtered by contrast and compute group averages.

This script concatenates segstats.tsv files from multiple participants, adds a
participant_id column to identify each subject's data, merges with participant
metadata from participants.tsv, filters the data to include only rows where the
specified contrast_column matches the contrast_value, and computes group averages
for each atlas region and metric.

This is a Snakemake script that expects the `snakemake` object to be available,
which is automatically provided when executed as part of a Snakemake workflow.
"""

import os
from pathlib import Path

import pandas as pd
import numpy as np


def extract_participant_id(path):
"""Extract participant_id from a BIDS file path.

Parameters
----------
path : str
Path to a BIDS file

Returns
-------
str or None
Participant ID (e.g., 'sub-01') or None if not found
"""
parts = Path(path).parts
for part in parts:
if part.startswith("sub-"):
return part
return None


def load_segstats_with_metadata(segstats_paths, participants_df):
"""Load all segstats files and merge with participant metadata.

Parameters
----------
segstats_paths : list
List of paths to segstats.tsv files
participants_df : pd.DataFrame
DataFrame containing participant metadata

Returns
-------
pd.DataFrame
Combined dataframe with segstats and participant metadata
"""
all_data = []

for path in segstats_paths:
if not os.path.exists(path):
continue

# Extract subject ID from path
subject_id = extract_participant_id(path)

if subject_id is None:
continue

# Load segstats
df = pd.read_csv(path, sep="\t")
df["participant_id"] = subject_id

all_data.append(df)

if not all_data:
raise ValueError(
f"No valid segstats files found. Attempted to load {len(segstats_paths)} "
f"file(s). Check that the files exist and contain valid data with "
f"subject IDs in the file paths (e.g., 'sub-01')."
)

# Combine all segstats
combined = pd.concat(all_data, ignore_index=True)

# Merge with participants metadata
merged = combined.merge(participants_df, on="participant_id", how="left")

return merged


def compute_group_averages(data, metric_columns):
"""Compute group averages for each atlas region and metric.

Parameters
----------
data : pd.DataFrame
Combined dataframe with segstats data
metric_columns : list
List of metric column names to average

Returns
-------
pd.DataFrame
Dataframe with group averages for each region
"""
# Group by region (index and name)
groupby_cols = ["index", "name"]

# Build aggregation dict - only include columns that exist
agg_dict = {}
missing_columns = []
for col in metric_columns:
if col in data.columns:
agg_dict[col] = "mean"
else:
missing_columns.append(col)

# Warn about missing columns to aid debugging
if missing_columns:
print(
f"Warning: The following metric columns were not found in data: {missing_columns}"
)
print(f"Available columns: {list(data.columns)}")

if not agg_dict:
raise ValueError(
f"None of the specified metric columns were found in the data. "
f"Requested: {metric_columns}. Available: {list(data.columns)}"
)

# Compute group averages
group_avg = data.groupby(groupby_cols, as_index=False).agg(agg_dict)

return group_avg


def main():
"""Main function - uses snakemake object provided by Snakemake workflow."""
# Load participants metadata
participants_df = pd.read_csv(snakemake.input.participants_tsv, sep="\t")

# Validate participants.tsv has required columns
if "participant_id" not in participants_df.columns:
raise ValueError("participants.tsv must contain a 'participant_id' column")

# Get contrast filtering parameters
contrast_column = snakemake.params.contrast_column
contrast_value = snakemake.params.contrast_value

# Validate contrast column exists in participants.tsv
if contrast_column not in participants_df.columns:
raise ValueError(
f"Contrast column '{contrast_column}' not found in participants.tsv. "
f"Available columns: {list(participants_df.columns)}"
)

# Load and combine all segstats files
combined_data = load_segstats_with_metadata(
snakemake.input.segstats_tsvs, participants_df
)

# Filter data based on contrast column and value
filtered_data = combined_data[combined_data[contrast_column] == contrast_value]

if filtered_data.empty:
raise ValueError(
f"No data found for {contrast_column}={contrast_value}. "
f"Available values in {contrast_column}: "
f"{combined_data[contrast_column].unique().tolist()}"
)

# Get metric columns to average - handle None values
metric_columns = []
if snakemake.params.metric_columns is not None:
metric_columns.extend(snakemake.params.metric_columns)
if snakemake.params.coloc_metric_columns is not None:
metric_columns.extend(snakemake.params.coloc_metric_columns)

if not metric_columns:
raise ValueError("No metric columns specified for averaging")

# Compute group averages
group_avg = compute_group_averages(filtered_data, metric_columns)

# Save averaged segstats table
group_avg.to_csv(snakemake.output.tsv, sep="\t", index=False)


if __name__ == "__main__":
main()