diff --git a/.gitignore b/.gitignore index 5ceaf47..0914b9c 100644 --- a/.gitignore +++ b/.gitignore @@ -115,7 +115,14 @@ venv.bak/ .mypy_cache/ # Results from assay-dev -./fish_morphology_code/processing/structure_organization/local_organization/train/data/ -./fish_morphology_code/processing/structure_organization/local_organization/train/patches/ -./fish_morphology_code/processing/structure_organization/output/ -./fish_morphology_code/processing/structure_organization/results/ +*.csv +fish_morphology_code/processing/structure_organization/local_organization/best_model +fish_morphology_code/processing/structure_organization/local_organization/train/data +fish_morphology_code/processing/structure_organization/local_organization/train/models +fish_morphology_code/processing/structure_organization/local_organization/train/patches +fish_morphology_code/processing/structure_organization/global_alignment/jinja2 +fish_morphology_code/processing/structure_organization/tools/jinja2 +fish_morphology_code/processing/structure_organization/notebooks/figs +fish_morphology_code/processing/structure_organization/output* +fish_morphology_code/processing/structure_organization/results* + diff --git a/fish_morphology_code/processing/structure_organization/README.md b/fish_morphology_code/processing/structure_organization/README.md index 1481262..57e988e 100644 --- a/fish_morphology_code/processing/structure_organization/README.md +++ b/fish_morphology_code/processing/structure_organization/README.md @@ -42,14 +42,15 @@ The `.pth` file corresponding to the best model should be manually copied to the The script `inference/inference.py` uses the best model selected by the user to perform classification on new images. The classification is done by using a sliding window through the image. The patch represented in the sliding window is classified 4 times. Once raw and three times with randomizaed rotation and image flipping. The probabilities of each class are averaged together between the 4 outputs. In addition, this script also uses the Allen Cell Structure Segmenter [1] to identify background regions in the input data and mask them out from the final classification maps. The input data is organized in the CSV file `database/database.csv`. Each row in this file corresponds to an input z-stack for which the inference will be performed on. Results are saved in the folder `structure_organization/output/` as the z-stacks are processed one at the time. An additional CSV file `database/database_cell.csv` is used to load the single cell manual segmentation. For each input z-stack an output z-stack is produced. The slices of the output z-stack are: -1. Highest mean intensity slice form the original input z-stack. -2. Probaility map for class Diffuse/others -3. Probaility map for class Fibers -4. Probaility map for class Disorganized puncta -5. Probaility map for class Organized Puncta -6. Probaility map for class Organized Z-disks -7. Final classification map based on the highest probability -8. Single cell manual segmentation +1. Highest mean intensity slice of the original input z-stack. +2. Sum projection of the original input z-stack +3. Probaility map for class Diffuse/others +4. Probaility map for class Fibers +5. Probaility map for class Disorganized puncta +6. Probaility map for class Organized Puncta +7. Probaility map for class Organized Z-disks +8. Final classification map based on the highest probability +9. Single cell manual segmentation The classes in the 7th slice are encoded as follow: @@ -107,10 +108,14 @@ This script will output the following metrics: * `Prob_Disorganized_Puncta` * `Prob_Organized_Puncta` * `Prob_Organized_ZDisks` +* `Intensity_Mean` * `Intensity_Median` * `Intensity_Integrated` +* `Intensity_SumIntegrated` +* `Intensity_Mean_BackSub` * `Intensity_Median_BackSub` * `Intensity_Integrated_BackSub` +* `Intensity_SumIntegrated_BackSub` * `Background_Value` This metrics are appended as new columns in the CSV file `output/fov_0.csv`. diff --git a/fish_morphology_code/processing/structure_organization/database/README.md b/fish_morphology_code/processing/structure_organization/database/README.md new file mode 100644 index 0000000..cbbaba3 --- /dev/null +++ b/fish_morphology_code/processing/structure_organization/database/README.md @@ -0,0 +1,3 @@ +Column: database_path, Description: Table with per FOV information +Column: cell_database_path, Description: Table with per cell information +Column: database_live_fixed_path, Description: Table with per FOV information from live fixed dataset diff --git a/fish_morphology_code/processing/structure_organization/database_old/README.md b/fish_morphology_code/processing/structure_organization/database_old/README.md new file mode 100644 index 0000000..cbbaba3 --- /dev/null +++ b/fish_morphology_code/processing/structure_organization/database_old/README.md @@ -0,0 +1,3 @@ +Column: database_path, Description: Table with per FOV information +Column: cell_database_path, Description: Table with per cell information +Column: database_live_fixed_path, Description: Table with per FOV information from live fixed dataset diff --git a/fish_morphology_code/processing/structure_organization/global_alignment/alignment.j2 b/fish_morphology_code/processing/structure_organization/global_alignment/alignment.j2 index cb662e1..13b47a6 100644 --- a/fish_morphology_code/processing/structure_organization/global_alignment/alignment.j2 +++ b/fish_morphology_code/processing/structure_organization/global_alignment/alignment.j2 @@ -3,8 +3,8 @@ #SBATCH --job-name=align_fov_{{ index }} #SBATCH --partition aics_cpu_general #SBATCH --mem 8G -#SBATCH --output /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/assay-dev-cardio/fish_morphology_code/fish_morphology_code/processing/structure_organization/jinja2/log/align_fov_{{ index }}.out +#SBATCH --output /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/fish_morphology_code/fish_morphology_code/processing/structure_organization/global_alignment/jinja2/log/align_fov_{{ index }}.out rfolder = script_folder = '' -srun /home/matheus.viana/anaconda3/envs/parallel/bin/python /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/assay-dev-cardio/fish_morphology_code/fish_morphology_code/processing/structure_organization/global_alignment/alignment.py --fov {{ index }} \ No newline at end of file +srun /home/matheus.viana/anaconda3/envs/parallel/bin/python /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/fish_morphology_code/fish_morphology_code/processing/structure_organization/global_alignment/alignment.py --fov {{ index }} \ No newline at end of file diff --git a/fish_morphology_code/processing/structure_organization/global_alignment/alignment.py b/fish_morphology_code/processing/structure_organization/global_alignment/alignment.py index 5047d9f..2aa64d9 100644 --- a/fish_morphology_code/processing/structure_organization/global_alignment/alignment.py +++ b/fish_morphology_code/processing/structure_organization/global_alignment/alignment.py @@ -223,7 +223,7 @@ def process_fov(FOVId, df_fov): ) parser.add_argument("--fov", help="Full path to FOV", required=True) args = vars(parser.parse_args()) - FOVId = int(args["fov"]) + FOVId = args["fov"] # Downlaod the datasets from Quilt if there is no local copy ds_folder = "../database/" diff --git a/fish_morphology_code/processing/structure_organization/global_alignment/alignment_jinja2.py b/fish_morphology_code/processing/structure_organization/global_alignment/alignment_jinja2.py index 38f01ce..6a2892d 100644 --- a/fish_morphology_code/processing/structure_organization/global_alignment/alignment_jinja2.py +++ b/fish_morphology_code/processing/structure_organization/global_alignment/alignment_jinja2.py @@ -26,10 +26,7 @@ "alignment.j2" ) -rfolder = ( - script_folder -) = "/allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/assay-dev-cardio/fish_morphology_code/fish_morphology_code/processing/structure_organization/jinja2/" - +rfolder = "jinja2/" for f in ["", "scripts", "log"]: if not os.path.exists(os.path.join(rfolder, f)): os.makedirs(os.path.join(rfolder, f)) diff --git a/fish_morphology_code/processing/structure_organization/local_organization/inference/inference.py b/fish_morphology_code/processing/structure_organization/local_organization/inference/inference.py index 41aa40b..70ccd2c 100644 --- a/fish_morphology_code/processing/structure_organization/local_organization/inference/inference.py +++ b/fish_morphology_code/processing/structure_organization/local_organization/inference/inference.py @@ -10,6 +10,7 @@ from scipy.ndimage.morphology import distance_transform_edt as edt from aicssegmentation.core.vessel import vesselness3D from aicssegmentation.core.pre_processing_utils import edge_preserving_smoothing_3d +from aicsimageio import AICSImage from cardio_CNN_classifier import cardio_cnn @@ -30,8 +31,8 @@ # Download from Quilt print("No weights were found locally. Downloading from Quilt...") pkg = Package.browse( - "matheus/assay_dev_actn2_classifier", "s3://allencell-internal-quilt" - ).fetch("../best_model") + "aics/integrated_transcriptomics_structural_organization_hipsc_cm", "s3://allencell" + )["actn2_pattern_ml_classifier_model"].fetch("../best_model") metadata = pd.read_csv(os.path.join("..", "best_model", "metadata.csv")) model_weights_path = os.path.join("..", "best_model", metadata.model_path[0]) elif len(model_weights_path) > 1: @@ -71,10 +72,14 @@ def segment_image(struct_img): metadata = pd.read_csv(os.path.join(ds_folder, "metadata.csv")) +# Read fov database df_fov = pd.read_csv(os.path.join(ds_folder, metadata.database_path[0]), index_col=0) +# Read single cell database df_cell = pd.read_csv(os.path.join(ds_folder, metadata.cell_database_path[0])) +print(f'FOV shape: {df_fov.shape}, Cellshape: {df_cell.shape}') + # Run all FOVs for index in df_fov.index: @@ -86,7 +91,8 @@ def segment_image(struct_img): nuc_ch = 1 # Channel of nuclar dye str_ch = 5 # Channel of EGFP-alpha-actinin-2 - img = skio.imread(filename) + # img = skio.imread(filename) + img = AICSImage(filename).data.squeeze() print(img.shape) nuc = img[int(nuc_ch)] img = img[int(str_ch)] @@ -94,16 +100,22 @@ def segment_image(struct_img): # Load single cell segmentation df_cell_sub = df_cell.loc[df_cell.RawFileName == df_fov.RawFileName[index]] mask_name = df_cell_sub.MaskFileName.values[0] - single_cell_mask = skio.imread(mask_name)[-1] + # single_cell_mask = skio.imread(mask_name)[-1] + single_cell_mask = AICSImage(mask_name).data.squeeze()[-1] # Structure segmentation nslices = img.shape[0] zprofile = img.mean(axis=-1).mean(axis=-1) slice_number = np.argmax(zprofile) + # Sum projection + img_sum = img.sum(axis=0) + + # Sub-stack at the center img = img[slice_number - 3 : slice_number + 4] img_binary = segment_image(img) + # Slice of highest intensity data = img[3] data_binary = img_binary[3] @@ -123,6 +135,7 @@ def segment_image(struct_img): data_lines = data_skell.copy() data_lines[data_junc > 3] = 0 + # Background detection bkrad = 64 data_mask = data_binary.copy() data_mask = data_mask.astype(np.uint16) @@ -173,6 +186,7 @@ def segment_image(struct_img): data_output = np.vstack( [ data.reshape(-1, *dim), + img_sum.reshape(-1, *dim), data_probs, data_classification.reshape(-1, *dim), single_cell_mask.reshape(-1, *dim), diff --git a/fish_morphology_code/processing/structure_organization/local_organization/train/model_training.py b/fish_morphology_code/processing/structure_organization/local_organization/train/model_training.py index 46384cb..e18ece5 100644 --- a/fish_morphology_code/processing/structure_organization/local_organization/train/model_training.py +++ b/fish_morphology_code/processing/structure_organization/local_organization/train/model_training.py @@ -16,8 +16,8 @@ # Load data from quilt p = Package.browse( - "matheus/assay_dev_classifier_train", "s3://allencell-internal-quilt" -).fetch("data/") + "aics/integrated_transcriptomics_structural_organization_hipsc_cm", "s3://allencell" +)["actn2_pattern_ml_classifier_train"].fetch("data/") manifest = pd.read_csv("data/metadata.csv", index_col=0) diff --git a/fish_morphology_code/processing/structure_organization/tools/assay-dev-fish.md b/fish_morphology_code/processing/structure_organization/tools/assay-dev-fish.md index aea80fb..6255e0b 100644 --- a/fish_morphology_code/processing/structure_organization/tools/assay-dev-fish.md +++ b/fish_morphology_code/processing/structure_organization/tools/assay-dev-fish.md @@ -17,10 +17,14 @@ - `Prob_Disorganized_Puncta`: Average probability of a pixel inside the cell to be classified as disorganized puncta - `Prob_Organized_Puncta`: Average probability of a pixel inside the cell to be classified as organized puncta - `Prob_Organized_ZDisks`: Average probability of a pixel inside the cell to be classified as organized z disks +- `IntensityMean`: Mean of GFP signal in cell mask - `IntensityMedian`: Median of GFP signal in cell mask - `IntensityIntegrated`: Integrated GFP signal in cell mask -- `IntensityMedianBkgSub`: Median of GFP signal in cell mask with background subtracted (10% percentile +- `IntensitySumIntegrated`: Integrated GFP signal in the sum projection of the cell mask +- `IntensityMeanBkgSub`: Mean of GFP signal in cell mask with background subtracted (10% percentile) +- `IntensityMedianBkgSub`: Median of GFP signal in cell mask with background subtracted (10% percentile) - `IntensityIntegratedBkgSub`: Integrated GFP signal in cell mask with background subtracted (10% percentile +- `IntensitySumIntegratedBkgSub`: Integrated GFP signal in the sum projection of the cell mask with background subtracted (nSlices x 10% percentile) - `Maximum_Coefficient_Variation`: Maximum value of the coefficient of variation obtained from correlation plots - `Peak_Height`: High of the highest peak in the correlation plots - `Peak_Distance`: Distance in pixels in which the maximum of the highest peak occurs diff --git a/fish_morphology_code/processing/structure_organization/tools/features.j2 b/fish_morphology_code/processing/structure_organization/tools/features.j2 index 0dff748..55d852c 100644 --- a/fish_morphology_code/processing/structure_organization/tools/features.j2 +++ b/fish_morphology_code/processing/structure_organization/tools/features.j2 @@ -3,6 +3,6 @@ #SBATCH --job-name=features_fov_{{ index }} #SBATCH --partition aics_cpu_general #SBATCH --mem 8G -#SBATCH --output /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/assay-dev-cardio/fish_morphology_code/fish_morphology_code/processing/structure_organization/jinja2/log/features_fov_{{ index }}.out +#SBATCH --output /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/fish_morphology_code/fish_morphology_code/processing/structure_organization/tools/jinja2/log/features_fov_{{ index }}.out -srun /home/matheus.viana/anaconda3/envs/parallel/bin/python /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/assay-dev-cardio/fish_morphology_code/fish_morphology_code/processing/structure_organization/tools/features.py --fov {{ index }} \ No newline at end of file +srun /home/matheus.viana/anaconda3/envs/parallel/bin/python /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/fish_morphology_code/fish_morphology_code/processing/structure_organization/tools/features.py --fov {{ index }} \ No newline at end of file diff --git a/fish_morphology_code/processing/structure_organization/tools/features.py b/fish_morphology_code/processing/structure_organization/tools/features.py index 61a3528..762f0b1 100644 --- a/fish_morphology_code/processing/structure_organization/tools/features.py +++ b/fish_morphology_code/processing/structure_organization/tools/features.py @@ -16,10 +16,11 @@ def process_fov(FOVId, df_fov): """ # Channel numbers - ch_raw = 0 - ch_prs = slice(1, 6) - ch_cla = 6 - ch_msk = 7 + ch_raw = 0 # highest intensity slice + ch_sum = 1 # sum projection + ch_prs = slice(2, 7) # probabilities + ch_cla = 7 # classification maps + ch_msk = 8 # cell segmentation masks source = "../output/" @@ -48,19 +49,32 @@ def process_fov(FOVId, df_fov): probs = data[ch_prs, mask > 0].mean(axis=1) - # Intensity based features + # Intensity based features on highest intensity slice intensities = data[ch_raw, mask > 0].flatten() - background_intensity = np.percentile(data[ch_raw], 10) + background_intensity = np.percentile(data[ch_raw], 1) + + avg_intensity = np.mean(intensities) med_intensity = np.percentile(intensities, 50) int_intensity = np.sum(intensities) + avg_intensity_bs = np.mean(intensities - background_intensity) + med_intensity_bs = np.percentile(intensities - background_intensity, 50) int_intensity_bs = np.sum(intensities - background_intensity) + # Intensity based features on sum projection + nslices = int(df_fov.at[CellId,'NSlices']) + + sum_intensities = data[ch_sum, mask > 0].flatten() + + int_sum_intensity = np.sum(sum_intensities) + + int_sum_intensity_bs = np.sum(sum_intensities - nslices*background_intensity) + # Dict for features features = { "Total_Area": total_area, @@ -75,11 +89,15 @@ def process_fov(FOVId, df_fov): "Prob_Disorganized_Puncta": probs[2], "Prob_Organized_Puncta": probs[3], "Prob_Organized_ZDisks": probs[4], + "Intensity_Mean": avg_intensity, "Intensity_Median": med_intensity, "Intensity_Integrated": int_intensity, + "Intensity_SumIntegrated": int_sum_intensity, + "Intensity_Mean_BackSub": avg_intensity_bs, "Intensity_Median_BackSub": med_intensity_bs, "Intensity_Integrated_BackSub": int_intensity_bs, - "Background_Value": background_intensity, + "Intensity_SumIntegrated_BackSub": int_sum_intensity_bs, + "Background_Value": background_intensity } for key, value in features.items(): @@ -102,7 +120,7 @@ def process_fov(FOVId, df_fov): ) parser.add_argument("--fov", help="Full path to FOV", required=True) args = vars(parser.parse_args()) - FOVId = int(args["fov"]) + FOVId = args["fov"] # Downlaod the datasets from Quilt if there is no local copy ds_folder = "../database/" diff --git a/fish_morphology_code/processing/structure_organization/tools/features_jinja2.py b/fish_morphology_code/processing/structure_organization/tools/features_jinja2.py index aa48ebc..b4a20e6 100644 --- a/fish_morphology_code/processing/structure_organization/tools/features_jinja2.py +++ b/fish_morphology_code/processing/structure_organization/tools/features_jinja2.py @@ -23,13 +23,12 @@ # Load jinja template # -j2template = jinja2.Environment(loader=jinja2.FileSystemLoader(".")).get_template( - "features.j2" -) +j2template = jinja2.Environment(loader=jinja2.FileSystemLoader(".")).get_template("features.j2") -rfolder = ( - script_folder -) = "/allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/assay-dev-cardio/fish_morphology_code/fish_morphology_code/processing/structure_organization/jinja2/" +rfolder = "jinja2/" +for f in ["", "scripts", "log"]: + if not os.path.exists(os.path.join(rfolder, f)): + os.makedirs(os.path.join(rfolder, f)) for index in df_fov.index: diff --git a/fish_morphology_code/processing/structure_organization/tools/merge_and_upload.py b/fish_morphology_code/processing/structure_organization/tools/merge_and_upload.py index c0521dd..8ae3563 100644 --- a/fish_morphology_code/processing/structure_organization/tools/merge_and_upload.py +++ b/fish_morphology_code/processing/structure_organization/tools/merge_and_upload.py @@ -20,7 +20,7 @@ # FOVs that could not be read from the server # We shall come back to this files in the future. -fovs_with_read_problems = [40, 135, 462, 2000] +fovs_with_read_problems = []#[40, 135, 462, 2000] # Gathering results df = [] @@ -64,10 +64,10 @@ if not os.path.exists("../results/"): os.makedirs("../results/") -# Checking expected shape of the dataframe -assert df.shape == (5161, 33) +df.set_index(["CellId"]).to_csv("../results/AssayDevFishAnalsysis2020-live_fixed.csv") -df.set_index(["CellId"]).to_csv("../results/AssayDevFishAnalsysis2020.csv") +# Checking expected shape of the dataframe +# assert df.shape == (5161, 33) # ------------------------------------------------------------------------------------------------- # Upload results to Quilt @@ -177,6 +177,12 @@ "description": "Average probability of a pixel inside the cell to be classified as organized z disks", } }, + { + "Intensity_Mean": { + "name": "IntensityMean", + "description": "Mean of GFP signal in cell mask", + } + }, { "Intensity_Median": { "name": "IntensityMedian", @@ -189,10 +195,22 @@ "description": "Integrated GFP signal in cell mask", } }, + { + "Intensity_SumIntegrated": { + "name": "IntensitySumIntegrated", + "description": "Integrated GFP signal in the sum projection of the cell mask", + } + }, + { + "Intensity_Mean_BackSub": { + "name": "IntensityMeanBkgSub", + "description": "Mean of GFP signal in cell mask with background subtracted (10% percentile)", + } + }, { "Intensity_Median_BackSub": { "name": "IntensityMedianBkgSub", - "description": "Median of GFP signal in cell mask with background subtracted (10% percentile", + "description": "Median of GFP signal in cell mask with background subtracted (10% percentile)", } }, { @@ -201,6 +219,12 @@ "description": "Integrated GFP signal in cell mask with background subtracted (10% percentile", } }, + { + "Intensity_SumIntegrated_BackSub": { + "name": "IntensitySumIntegratedBkgSub", + "description": "Integrated GFP signal in the sum projection of the cell mask with background subtracted (nSlices x 10% percentile)", + } + }, { "Maximum_Coefficient_Variation": { "name": None, @@ -255,24 +279,26 @@ ) # Checking expected shape of the dataframe -assert df.shape == (5161, 25) +# assert df.shape == (5161, 25) # Save a hand off version for the Modeling team -df.to_csv("../results/AssayDevFishAnalsysis-Handoff.csv") - -# Upload to Quilt -ds = Dataset( - dataset="../results/AssayDevFishAnalsysis-Handoff.csv", - name="assay_dev_fish_analysis", - package_owner="matheus", - readme_path="assay-dev-fish.md", -) - -# Set metadata and path columns -ds.set_metadata_columns(["CellId"]) -ds.set_path_columns(["result_image_path"]) - -# Send to Quilt -pkg = ds.distribute( - push_uri="s3://allencell-internal-quilt", message="Fish dataset by assay-dev" -) +df.to_csv("../results/AssayDevFishAnalsysis-Handoff-live_fixed.csv") + +if False: + + # Upload to Quilt + ds = Dataset( + dataset="../results/AssayDevFishAnalsysis-Handoff.csv", + name="assay_dev_fish_analysis", + package_owner="matheus", + readme_path="assay-dev-fish.md", + ) + + # Set metadata and path columns + ds.set_metadata_columns(["CellId"]) + ds.set_path_columns(["result_image_path"]) + + # Send to Quilt + pkg = ds.distribute( + push_uri="s3://allencell-internal-quilt", message="Fish dataset by assay-dev" + )