AllenCellModeling · vianamp · Oct 23, 2020 · Oct 23, 2020 · Nov 9, 2020 · Feb 11, 2022
diff --git a/.gitignore b/.gitignore
@@ -115,7 +115,14 @@ venv.bak/
 .mypy_cache/
 
 # Results from assay-dev
-./fish_morphology_code/processing/structure_organization/local_organization/train/data/
-./fish_morphology_code/processing/structure_organization/local_organization/train/patches/
-./fish_morphology_code/processing/structure_organization/output/
-./fish_morphology_code/processing/structure_organization/results/
+*.csv
+fish_morphology_code/processing/structure_organization/local_organization/best_model
+fish_morphology_code/processing/structure_organization/local_organization/train/data
+fish_morphology_code/processing/structure_organization/local_organization/train/models
+fish_morphology_code/processing/structure_organization/local_organization/train/patches
+fish_morphology_code/processing/structure_organization/global_alignment/jinja2
+fish_morphology_code/processing/structure_organization/tools/jinja2
+fish_morphology_code/processing/structure_organization/notebooks/figs
+fish_morphology_code/processing/structure_organization/output*
+fish_morphology_code/processing/structure_organization/results*
+
diff --git a/fish_morphology_code/processing/structure_organization/README.md b/fish_morphology_code/processing/structure_organization/README.md
@@ -42,14 +42,15 @@ The `.pth` file corresponding to the best model should be manually copied to the
 
 The script `inference/inference.py` uses the best model selected by the user to perform classification on new images. The classification is done by using a sliding window through the image. The patch represented in the sliding window is classified 4 times. Once raw and three times with randomizaed rotation and image flipping. The probabilities of each class are averaged together between the 4 outputs. In addition, this script also uses the Allen Cell Structure Segmenter [1] to identify background regions in the input data and mask them out from the final classification maps. The input data is organized in the CSV file `database/database.csv`. Each row in this file corresponds to an input z-stack for which the inference will be performed on. Results are saved in the folder `structure_organization/output/` as the z-stacks are processed one at the time. An additional CSV file `database/database_cell.csv` is used to load the single cell manual segmentation. For each input z-stack an output z-stack is produced. The slices of the output z-stack are:
 
-1. Highest mean intensity slice form the original input z-stack.
-2. Probaility map for class Diffuse/others
-3. Probaility map for class Fibers
-4. Probaility map for class Disorganized puncta
-5. Probaility map for class Organized Puncta
-6. Probaility map for class Organized Z-disks
-7. Final classification map based on the highest probability
-8. Single cell manual segmentation 
+1. Highest mean intensity slice of the original input z-stack.
+2. Sum projection of the original input z-stack
+3. Probaility map for class Diffuse/others
+4. Probaility map for class Fibers
+5. Probaility map for class Disorganized puncta
+6. Probaility map for class Organized Puncta
+7. Probaility map for class Organized Z-disks
+8. Final classification map based on the highest probability
+9. Single cell manual segmentation 
 
 The classes in the 7th slice are encoded as follow:
 
@@ -107,10 +108,14 @@ This script will output the following metrics:
 * `Prob_Disorganized_Puncta`
 * `Prob_Organized_Puncta`
 * `Prob_Organized_ZDisks`
+* `Intensity_Mean`
 * `Intensity_Median`
 * `Intensity_Integrated`
+* `Intensity_SumIntegrated`
+* `Intensity_Mean_BackSub`
 * `Intensity_Median_BackSub`
 * `Intensity_Integrated_BackSub`
+* `Intensity_SumIntegrated_BackSub`
 * `Background_Value`
 
 This metrics are appended as new columns in the CSV file `output/fov_0.csv`.

diff --git a/fish_morphology_code/processing/structure_organization/database/README.md b/fish_morphology_code/processing/structure_organization/database/README.md
@@ -0,0 +1,3 @@
+Column: database_path, Description: Table with per FOV information
+Column: cell_database_path, Description: Table with per cell information
+Column: database_live_fixed_path, Description: Table with per FOV information from live fixed dataset
diff --git a/fish_morphology_code/processing/structure_organization/database_old/README.md b/fish_morphology_code/processing/structure_organization/database_old/README.md
@@ -0,0 +1,3 @@
+Column: database_path, Description: Table with per FOV information
+Column: cell_database_path, Description: Table with per cell information
+Column: database_live_fixed_path, Description: Table with per FOV information from live fixed dataset
diff --git a/fish_morphology_code/processing/structure_organization/global_alignment/alignment.j2 b/fish_morphology_code/processing/structure_organization/global_alignment/alignment.j2
@@ -3,8 +3,8 @@
 #SBATCH --job-name=align_fov_{{ index }}
 #SBATCH --partition aics_cpu_general
 #SBATCH --mem 8G
-#SBATCH --output /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/assay-dev-cardio/fish_morphology_code/fish_morphology_code/processing/structure_organization/jinja2/log/align_fov_{{ index }}.out
+#SBATCH --output /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/fish_morphology_code/fish_morphology_code/processing/structure_organization/global_alignment/jinja2/log/align_fov_{{ index }}.out
 
 rfolder = script_folder = ''
 
-srun /home/matheus.viana/anaconda3/envs/parallel/bin/python /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/assay-dev-cardio/fish_morphology_code/fish_morphology_code/processing/structure_organization/global_alignment/alignment.py --fov {{ index }}
+srun /home/matheus.viana/anaconda3/envs/parallel/bin/python /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/fish_morphology_code/fish_morphology_code/processing/structure_organization/global_alignment/alignment.py --fov {{ index }}
diff --git a/fish_morphology_code/processing/structure_organization/global_alignment/alignment.py b/fish_morphology_code/processing/structure_organization/global_alignment/alignment.py
@@ -223,7 +223,7 @@ def process_fov(FOVId, df_fov):
     )
     parser.add_argument("--fov", help="Full path to FOV", required=True)
     args = vars(parser.parse_args())
-    FOVId = int(args["fov"])
+    FOVId = args["fov"]
 
     # Downlaod the datasets from Quilt if there is no local copy
     ds_folder = "../database/"

diff --git a/fish_morphology_code/processing/structure_organization/global_alignment/alignment_jinja2.py b/fish_morphology_code/processing/structure_organization/global_alignment/alignment_jinja2.py
@@ -26,10 +26,7 @@
     "alignment.j2"
 )
 
-rfolder = (
-    script_folder
-) = "/allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/assay-dev-cardio/fish_morphology_code/fish_morphology_code/processing/structure_organization/jinja2/"
-
+rfolder = "jinja2/"
 for f in ["", "scripts", "log"]:
     if not os.path.exists(os.path.join(rfolder, f)):
         os.makedirs(os.path.join(rfolder, f))

diff --git a/...rphology_code/processing/structure_organization/local_organization/inference/inference.py b/...rphology_code/processing/structure_organization/local_organization/inference/inference.py
@@ -10,6 +10,7 @@
 from scipy.ndimage.morphology import distance_transform_edt as edt
 from aicssegmentation.core.vessel import vesselness3D
 from aicssegmentation.core.pre_processing_utils import edge_preserving_smoothing_3d
+from aicsimageio import AICSImage
 
 from cardio_CNN_classifier import cardio_cnn
 
@@ -30,8 +31,8 @@
     # Download from Quilt
     print("No weights were found locally. Downloading from Quilt...")
     pkg = Package.browse(
-        "matheus/assay_dev_actn2_classifier", "s3://allencell-internal-quilt"
-    ).fetch("../best_model")
+        "aics/integrated_transcriptomics_structural_organization_hipsc_cm", "s3://allencell"
+    )["actn2_pattern_ml_classifier_model"].fetch("../best_model")
     metadata = pd.read_csv(os.path.join("..", "best_model", "metadata.csv"))
     model_weights_path = os.path.join("..", "best_model", metadata.model_path[0])
 elif len(model_weights_path) > 1:
@@ -71,10 +72,14 @@ def segment_image(struct_img):
 
 metadata = pd.read_csv(os.path.join(ds_folder, "metadata.csv"))
 
+# Read fov database
 df_fov = pd.read_csv(os.path.join(ds_folder, metadata.database_path[0]), index_col=0)
 
+# Read single cell database
 df_cell = pd.read_csv(os.path.join(ds_folder, metadata.cell_database_path[0]))
 
+print(f'FOV shape: {df_fov.shape}, Cellshape: {df_cell.shape}')
+
 # Run all FOVs
 for index in df_fov.index:
 
@@ -86,24 +91,31 @@ def segment_image(struct_img):
     nuc_ch = 1  # Channel of nuclar dye
     str_ch = 5  # Channel of EGFP-alpha-actinin-2
 
-    img = skio.imread(filename)
+    # img = skio.imread(filename)
+    img = AICSImage(filename).data.squeeze()
     print(img.shape)
     nuc = img[int(nuc_ch)]
     img = img[int(str_ch)]
 
     # Load single cell segmentation
     df_cell_sub = df_cell.loc[df_cell.RawFileName == df_fov.RawFileName[index]]
     mask_name = df_cell_sub.MaskFileName.values[0]
-    single_cell_mask = skio.imread(mask_name)[-1]
+    # single_cell_mask = skio.imread(mask_name)[-1]
+    single_cell_mask = AICSImage(mask_name).data.squeeze()[-1]
 
     # Structure segmentation
     nslices = img.shape[0]
     zprofile = img.mean(axis=-1).mean(axis=-1)
     slice_number = np.argmax(zprofile)
 
+    # Sum projection
+    img_sum = img.sum(axis=0)
+
+    # Sub-stack at the center
     img = img[slice_number - 3 : slice_number + 4]
     img_binary = segment_image(img)
 
+    # Slice of highest intensity
     data = img[3]
     data_binary = img_binary[3]
 
@@ -123,6 +135,7 @@ def segment_image(struct_img):
     data_lines = data_skell.copy()
     data_lines[data_junc > 3] = 0
 
+    # Background detection
     bkrad = 64
     data_mask = data_binary.copy()
     data_mask = data_mask.astype(np.uint16)
@@ -173,6 +186,7 @@ def segment_image(struct_img):
     data_output = np.vstack(
         [
             data.reshape(-1, *dim),
+            img_sum.reshape(-1, *dim),
             data_probs,
             data_classification.reshape(-1, *dim),
             single_cell_mask.reshape(-1, *dim),

diff --git a/...phology_code/processing/structure_organization/local_organization/train/model_training.py b/...phology_code/processing/structure_organization/local_organization/train/model_training.py
@@ -16,8 +16,8 @@
 
 # Load data from quilt
 p = Package.browse(
-    "matheus/assay_dev_classifier_train", "s3://allencell-internal-quilt"
-).fetch("data/")
+    "aics/integrated_transcriptomics_structural_organization_hipsc_cm", "s3://allencell"
+)["actn2_pattern_ml_classifier_train"].fetch("data/")
 
 manifest = pd.read_csv("data/metadata.csv", index_col=0)
 

diff --git a/fish_morphology_code/processing/structure_organization/tools/assay-dev-fish.md b/fish_morphology_code/processing/structure_organization/tools/assay-dev-fish.md
@@ -17,10 +17,14 @@
 - `Prob_Disorganized_Puncta`: Average probability of a pixel inside the cell to be classified as disorganized puncta
 - `Prob_Organized_Puncta`: Average probability of a pixel inside the cell to be classified as organized puncta
 - `Prob_Organized_ZDisks`: Average probability of a pixel inside the cell to be classified as organized z disks
+- `IntensityMean`: Mean of GFP signal in cell mask
 - `IntensityMedian`: Median of GFP signal in cell mask
 - `IntensityIntegrated`: Integrated GFP signal in cell mask
-- `IntensityMedianBkgSub`: Median of GFP signal in cell mask with background subtracted (10% percentile
+- `IntensitySumIntegrated`: Integrated GFP signal in the sum projection of the cell mask
+- `IntensityMeanBkgSub`: Mean of GFP signal in cell mask with background subtracted (10% percentile)
+- `IntensityMedianBkgSub`: Median of GFP signal in cell mask with background subtracted (10% percentile)
 - `IntensityIntegratedBkgSub`: Integrated GFP signal in cell mask with background subtracted (10% percentile
+- `IntensitySumIntegratedBkgSub`: Integrated GFP signal in the sum projection of the cell mask with background subtracted (nSlices x 10% percentile)
 - `Maximum_Coefficient_Variation`: Maximum value of the coefficient of variation obtained from correlation plots
 - `Peak_Height`: High of the highest peak in the correlation plots
 - `Peak_Distance`: Distance in pixels in which the maximum of the highest peak occurs

diff --git a/fish_morphology_code/processing/structure_organization/tools/features.j2 b/fish_morphology_code/processing/structure_organization/tools/features.j2
@@ -3,6 +3,6 @@
 #SBATCH --job-name=features_fov_{{ index }}
 #SBATCH --partition aics_cpu_general
 #SBATCH --mem 8G
-#SBATCH --output /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/assay-dev-cardio/fish_morphology_code/fish_morphology_code/processing/structure_organization/jinja2/log/features_fov_{{ index }}.out
+#SBATCH --output /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/fish_morphology_code/fish_morphology_code/processing/structure_organization/tools/jinja2/log/features_fov_{{ index }}.out
 
-srun /home/matheus.viana/anaconda3/envs/parallel/bin/python /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/assay-dev-cardio/fish_morphology_code/fish_morphology_code/processing/structure_organization/tools/features.py --fov {{ index }}
+srun /home/matheus.viana/anaconda3/envs/parallel/bin/python /allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/fish_morphology_code/fish_morphology_code/processing/structure_organization/tools/features.py --fov {{ index }}
diff --git a/fish_morphology_code/processing/structure_organization/tools/features.py b/fish_morphology_code/processing/structure_organization/tools/features.py
@@ -16,10 +16,11 @@ def process_fov(FOVId, df_fov):
     """
 
     # Channel numbers
-    ch_raw = 0
-    ch_prs = slice(1, 6)
-    ch_cla = 6
-    ch_msk = 7
+    ch_raw = 0 # highest intensity slice
+    ch_sum = 1 # sum projection
+    ch_prs = slice(2, 7) # probabilities
+    ch_cla = 7 # classification maps
+    ch_msk = 8 # cell segmentation masks
 
     source = "../output/"
 
@@ -48,19 +49,32 @@ def process_fov(FOVId, df_fov):
 
         probs = data[ch_prs, mask > 0].mean(axis=1)
 
-        # Intensity based features
+        # Intensity based features on highest intensity slice
         intensities = data[ch_raw, mask > 0].flatten()
 
-        background_intensity = np.percentile(data[ch_raw], 10)
+        background_intensity = np.percentile(data[ch_raw], 1)
+
+        avg_intensity = np.mean(intensities)
 
         med_intensity = np.percentile(intensities, 50)
 
         int_intensity = np.sum(intensities)
 
+        avg_intensity_bs = np.mean(intensities - background_intensity)
+
         med_intensity_bs = np.percentile(intensities - background_intensity, 50)
 
         int_intensity_bs = np.sum(intensities - background_intensity)
 
+        # Intensity based features on sum projection
+        nslices = int(df_fov.at[CellId,'NSlices'])
+
+        sum_intensities = data[ch_sum, mask > 0].flatten()
+
+        int_sum_intensity = np.sum(sum_intensities)
+
+        int_sum_intensity_bs = np.sum(sum_intensities - nslices*background_intensity)
+
         # Dict for features
         features = {
             "Total_Area": total_area,
@@ -75,11 +89,15 @@ def process_fov(FOVId, df_fov):
             "Prob_Disorganized_Puncta": probs[2],
             "Prob_Organized_Puncta": probs[3],
             "Prob_Organized_ZDisks": probs[4],
+            "Intensity_Mean": avg_intensity,
             "Intensity_Median": med_intensity,
             "Intensity_Integrated": int_intensity,
+            "Intensity_SumIntegrated": int_sum_intensity,
+            "Intensity_Mean_BackSub": avg_intensity_bs,
             "Intensity_Median_BackSub": med_intensity_bs,
             "Intensity_Integrated_BackSub": int_intensity_bs,
-            "Background_Value": background_intensity,
+            "Intensity_SumIntegrated_BackSub": int_sum_intensity_bs,
+            "Background_Value": background_intensity
         }
 
         for key, value in features.items():
@@ -102,7 +120,7 @@ def process_fov(FOVId, df_fov):
     )
     parser.add_argument("--fov", help="Full path to FOV", required=True)
     args = vars(parser.parse_args())
-    FOVId = int(args["fov"])
+    FOVId = args["fov"]
 
     # Downlaod the datasets from Quilt if there is no local copy
     ds_folder = "../database/"

diff --git a/fish_morphology_code/processing/structure_organization/tools/features_jinja2.py b/fish_morphology_code/processing/structure_organization/tools/features_jinja2.py
@@ -23,13 +23,12 @@
 # Load jinja template
 #
 
-j2template = jinja2.Environment(loader=jinja2.FileSystemLoader(".")).get_template(
-    "features.j2"
-)
+j2template = jinja2.Environment(loader=jinja2.FileSystemLoader(".")).get_template("features.j2")
 
-rfolder = (
-    script_folder
-) = "/allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/assay-dev-cardio/fish_morphology_code/fish_morphology_code/processing/structure_organization/jinja2/"
+rfolder = "jinja2/"
+for f in ["", "scripts", "log"]:
+    if not os.path.exists(os.path.join(rfolder, f)):
+        os.makedirs(os.path.join(rfolder, f))
 
 for index in df_fov.index: