diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d609e65..4dce26a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,6 +17,10 @@ repos: args: [--fix=lf] - id: requirements-txt-fixer - id: trailing-whitespace + - repo: https://github.com/snakemake/snakefmt + rev: v0.11.2 + hooks: + - id: snakefmt - repo: https://github.com/charliermarsh/ruff-pre-commit rev: v0.6.3 hooks: diff --git a/photon_mosaic/workflow/Snakefile b/photon_mosaic/workflow/Snakefile index 201bddb..a97b607 100644 --- a/photon_mosaic/workflow/Snakefile +++ b/photon_mosaic/workflow/Snakefile @@ -18,14 +18,20 @@ from photon_mosaic import log_cuda_availability import logging # Configure logging based on config settings -log_level = logging.DEBUG if config.get("logging", {}).get("snakemake_verbose", False) else logging.INFO +log_level = ( + logging.DEBUG + if config.get("logging", {}).get("snakemake_verbose", False) + else logging.INFO +) logging.basicConfig(level=log_level) logger = logging.getLogger("snakemake.workflow") + # CUDA availability check on workflow start onstart: log_cuda_availability() + raw_data_base = Path(config["raw_data_base"]).resolve() processed_data_base = Path(config["processed_data_base"]).resolve() slurm_config = config.get("slurm", {}) @@ -49,7 +55,9 @@ discoverer = DatasetDiscoverer( exclude_datasets=config["dataset_discovery"].get("exclude_datasets"), exclude_sessions=config["dataset_discovery"].get("exclude_sessions"), tiff_patterns=config["dataset_discovery"].get("tiff_patterns"), - neuroblueprint_format=config["dataset_discovery"].get("neuroblueprint_format", False), + neuroblueprint_format=config["dataset_discovery"].get( + "neuroblueprint_format", False + ), ) discoverer.discover() @@ -66,7 +74,9 @@ preproc_targets = [ / f"{output_pattern}{tiff_name}" ) for i, dataset_name in enumerate(discoverer.transformed_datasets) - for session_idx, tiff_list in discoverer.tiff_files[discoverer.original_datasets[i]].items() + for session_idx, tiff_list in discoverer.tiff_files[ + discoverer.original_datasets[i] + ].items() for tiff_name in tiff_list ] @@ -83,17 +93,21 @@ suite2p_targets = [ / fname ) for i, dataset_name in enumerate(discoverer.transformed_datasets) - for session_idx, tiff_list in discoverer.tiff_files[discoverer.original_datasets[i]].items() + for session_idx, tiff_list in discoverer.tiff_files[ + discoverer.original_datasets[i] + ].items() for fname in ["F.npy", "data.bin"] if tiff_list # Only create targets for sessions that have files ] logger.info(f"Suite2p targets: {suite2p_targets}") + include: "preprocessing.smk" include: "suite2p.smk" + rule all: input: preproc_targets, - suite2p_targets + suite2p_targets, diff --git a/photon_mosaic/workflow/preprocessing.smk b/photon_mosaic/workflow/preprocessing.smk index 53e4363..78062f5 100644 --- a/photon_mosaic/workflow/preprocessing.smk +++ b/photon_mosaic/workflow/preprocessing.smk @@ -25,36 +25,61 @@ import os # Configure SLURM resources if enabled slurm_config = config.get("slurm", {}) if config.get("use_slurm") else {} + # Preprocessing rule rule preprocessing: input: - img=lambda wildcards: cross_platform_path(raw_data_base / discoverer.original_datasets[discoverer.transformed_datasets.index(wildcards.subject_name)]) + img=lambda wildcards: cross_platform_path( + raw_data_base + / discoverer.original_datasets[ + discoverer.transformed_datasets.index(wildcards.subject_name) + ] + ), output: processed=cross_platform_path( Path(processed_data_base).resolve() / "{subject_name}" / "{session_name}" / "funcimg" - / (f"{output_pattern}"+ "{tiff}") - ) + / (f"{output_pattern}" + "{tiff}") + ), params: - dataset_folder=lambda wildcards: cross_platform_path(raw_data_base / discoverer.original_datasets[discoverer.transformed_datasets.index(wildcards.subject_name)]), + dataset_folder=lambda wildcards: cross_platform_path( + raw_data_base + / discoverer.original_datasets[ + discoverer.transformed_datasets.index(wildcards.subject_name) + ] + ), output_folder=lambda wildcards: cross_platform_path( Path(processed_data_base).resolve() / wildcards.subject_name / wildcards.session_name / "funcimg" ), - ses_idx=lambda wildcards: int(wildcards.session_name.split("_")[0].replace("ses-", "")), + ses_idx=lambda wildcards: int( + wildcards.session_name.split("_")[0].replace("ses-", "") + ), wildcard_constraints: - tiff="|".join(sorted(discoverer.tiff_files_flat)) if discoverer.tiff_files_flat else "dummy", + tiff=( + "|".join(sorted(discoverer.tiff_files_flat)) + if discoverer.tiff_files_flat + else "dummy" + ), subject_name="|".join(discoverer.transformed_datasets), - session_name="|".join([discoverer.get_session_name(i, session_idx) for i in range(len(discoverer.transformed_datasets)) - for session_idx in discoverer.tiff_files[discoverer.original_datasets[i]].keys()]), + session_name="|".join( + [ + discoverer.get_session_name(i, session_idx) + for i in range(len(discoverer.transformed_datasets)) + for session_idx in discoverer.tiff_files[ + discoverer.original_datasets[i] + ].keys() + ] + ), resources: **(slurm_config if config.get("use_slurm") else {}), run: from photon_mosaic.rules.preprocessing import run_preprocessing + run_preprocessing( Path(params.output_folder), config["preprocessing"], diff --git a/photon_mosaic/workflow/suite2p.smk b/photon_mosaic/workflow/suite2p.smk index bc22dd6..8df18dd 100644 --- a/photon_mosaic/workflow/suite2p.smk +++ b/photon_mosaic/workflow/suite2p.smk @@ -17,6 +17,7 @@ Output: Suite2p analysis results (F.npy, data.bin) in suite2p/plane0/ directory import re from photon_mosaic.snakemake_utils import cross_platform_path + rule suite2p: input: tiffs=lambda wildcards: [ @@ -27,9 +28,11 @@ rule suite2p: / "funcimg" / f"{output_pattern}{tiff_name}" ) - for tiff_name in discoverer.tiff_files[discoverer.original_datasets[discoverer.transformed_datasets.index(wildcards.subject_name)]][ - int(wildcards.session_name.split("_")[0].replace("ses-", "")) - ] + for tiff_name in discoverer.tiff_files[ + discoverer.original_datasets[ + discoverer.transformed_datasets.index(wildcards.subject_name) + ] + ][int(wildcards.session_name.split("_")[0].replace("ses-", ""))] ], output: F=cross_platform_path( @@ -49,7 +52,7 @@ rule suite2p: / "suite2p" / "plane0" / "data.bin" - ) + ), params: dataset_folder=lambda wildcards: cross_platform_path( Path(processed_data_base).resolve() @@ -59,8 +62,15 @@ rule suite2p: ), wildcard_constraints: subject_name="|".join(discoverer.transformed_datasets), - session_name="|".join([discoverer.get_session_name(i, session_idx) for i in range(len(discoverer.transformed_datasets)) - for session_idx in discoverer.tiff_files[discoverer.original_datasets[i]].keys()]), + session_name="|".join( + [ + discoverer.get_session_name(i, session_idx) + for i in range(len(discoverer.transformed_datasets)) + for session_idx in discoverer.tiff_files[ + discoverer.original_datasets[i] + ].keys() + ] + ), resources: **(slurm_config if config.get("use_slurm") else {}), run: