Wofe CLI fixes (#482)

nmaarnio · mipeso · web-flow · commit 417e74a076a3 · 2025-02-11T09:10:48.000+02:00
---------

Co-authored-by: mipeso &lt;mikasorvoja1@gmail.com&gt;
diff --git a/eis_toolkit/cli.py b/eis_toolkit/cli.py
@@ -3091,9 +3091,10 @@ def gamma_overlay_cli(input_rasters: INPUT_FILES_ARGUMENT, output_raster: OUTPUT
 # WOFE
 @app.command()
 def weights_of_evidence_calculate_weights_cli(
-    input_raster: INPUT_FILE_OPTION,
-    input_vector: INPUT_FILE_OPTION,
-    output_dir: OUTPUT_DIR_OPTION,
+    evidential_raster: INPUT_FILE_OPTION,
+    deposits: INPUT_FILE_OPTION,
+    output_results_table: OUTPUT_FILE_OPTION,
+    output_raster_dir: OUTPUT_DIR_OPTION,
     raster_nodata: Optional[float] = None,
     weights_type: Annotated[WeightsType, typer.Option(case_sensitive=False)] = WeightsType.unique,
     studentized_contrast_threshold: float = 1,
@@ -3102,6 +3103,9 @@ def weights_of_evidence_calculate_weights_cli(
     """
     Calculate weights of spatial associations.
 
+    Save path for resulting CSV is set using --output-results-table parameter. Output rasters are saved to directory
+    set with --output-raster-dir parameter.
+
     Parameter --studentized-contrast-threshold is used with 'categorical', 'ascending' and 'descending' weight types.
 
     Parameter --arrays-to-generate controls which columns in the weights dataframe are returned as arrays. All column
@@ -3115,8 +3119,13 @@ def weights_of_evidence_calculate_weights_cli(
 
     typer.echo("Progress: 10%")
 
-    evidential_raster = rasterio.open(input_raster)
-    deposits = gpd.read_file(input_vector)
+    evidential_raster = rasterio.open(evidential_raster)
+
+    if deposits.suffix in (".tif", ".tiff", ".asc", ".img", ".vrt", ".grd"):
+        deposits = rasterio.open(deposits)
+    else:
+        deposits = gpd.read_file(deposits)
+
     typer.echo("Progress: 25%")
 
     if arrays_to_generate == []:
@@ -3132,36 +3141,42 @@ def weights_of_evidence_calculate_weights_cli(
     )
     typer.echo("Progress: 75%")
 
-    df.to_csv(output_dir.joinpath("wofe_results.csv"))
+    df.to_csv(output_results_table)
 
     out_rasters_dict = {}
-    file_name = input_raster.name.split(".")[0]
+    file_name = evidential_raster.name.split("/")[-1].split(".")[0]
+    raster_meta.pop("dtype")  # Remove dtype from metadata to set it individually
+
     for key, array in arrays.items():
+        # Set correct dtype for the array
+        if key in ["Class", "Pixel count", "Deposit count"]:
+            dtype = np.uint8
+        else:
+            dtype = np.float32
+
         array = nan_to_nodata(array, raster_meta["nodata"])
         output_raster_name = file_name + "_weights_" + weights_type + "_" + key
-        output_raster_path = output_dir.joinpath(output_raster_name + ".tif")
-        with rasterio.open(output_raster_path, "w", **raster_meta) as dst:
+        output_raster_path = output_raster_dir.joinpath(output_raster_name + ".tif")
+        with rasterio.open(output_raster_path, "w", dtype=dtype, **raster_meta) as dst:
             dst.write(array, 1)
         out_rasters_dict[output_raster_name] = str(output_raster_path)
 
     json_str = json.dumps(out_rasters_dict)
     typer.echo("Progress 100%")
 
-    typer.echo(f"Number of deposit pixels: {nr_of_deposits}")
-    typer.echo(f"Number of all evidence pixels: {nr_of_pixels}")
     typer.echo(f"Output rasters: {json_str}")
-    typer.echo(f"Weight calculations completed, rasters and CSV saved to {output_dir}.")
+    typer.echo(f"Weight calculations completed, rasters saved to {output_raster_dir}.")
+    typer.echo(f"Weights table saved to {output_results_table}.")
 
 
 @app.command()
 def weights_of_evidence_calculate_responses_cli(
     input_rasters_weights: INPUT_FILES_OPTION,
     input_rasters_standard_deviations: INPUT_FILES_OPTION,
+    input_weights_table: INPUT_FILE_OPTION,
     output_probabilities: OUTPUT_FILE_OPTION,
     output_probabilities_std: OUTPUT_FILE_OPTION,
     output_confidence_array: OUTPUT_FILE_OPTION,
-    nr_of_deposits: Annotated[int, typer.Option()],
-    nr_of_pixels: Annotated[int, typer.Option()],
 ):
     """
     Calculate the posterior probabilities for the given generalized weight arrays.
@@ -3198,10 +3213,12 @@ def weights_of_evidence_calculate_responses_cli(
 
         dict_array.append({"W+": array_W, "S_W+": array_S_W})
 
+    weights_df = pd.read_csv(input_weights_table)
+
     typer.echo("Progress: 25%")
 
     posterior_probabilities, posterior_probabilies_std, confidence_array = weights_of_evidence_calculate_responses(
-        output_arrays=dict_array, nr_of_deposits=nr_of_deposits, nr_of_pixels=nr_of_pixels
+        output_arrays=dict_array, weights_df=weights_df
     )
     typer.echo("Progress: 75%")
 
@@ -3220,7 +3237,7 @@ def weights_of_evidence_calculate_responses_cli(
     typer.echo("Progress: 100%")
 
     typer.echo(
-        f"Responses calculations finished, writing output rasters to {output_probabilities}, \
+        f"Posterior probability calculations finished, output rasters saved to {output_probabilities}, \
             {output_probabilities_std} and {output_confidence_array}"
     )
 
@@ -3229,7 +3246,7 @@ def weights_of_evidence_calculate_responses_cli(
 def agterberg_cheng_CI_test_cli(
     input_posterior_probabilities: INPUT_FILE_OPTION,
     input_posterior_probabilities_std: INPUT_FILE_OPTION,
-    nr_of_deposits: Annotated[int, typer.Option()],
+    input_weights_table: INPUT_FILE_OPTION,
 ):
     """Perform the conditional independence test presented by Agterberg-Cheng (2002)."""
     from eis_toolkit.prediction.weights_of_evidence import agterberg_cheng_CI_test
@@ -3244,12 +3261,14 @@ def agterberg_cheng_CI_test_cli(
         posterior_probabilities_std = src.read(1)
         posterior_probabilities_std = nodata_to_nan(posterior_probabilities_std, src.nodata)
 
+    weights_df = pd.read_csv(input_weights_table)
+
     typer.echo("Progress: 25%")
 
     _, _, _, _, summary = agterberg_cheng_CI_test(
         posterior_probabilities=posterior_probabilities,
         posterior_probabilities_std=posterior_probabilities_std,
-        nr_of_deposits=nr_of_deposits,
+        weights_df=weights_df,
     )
 
     typer.echo("Progress: 100%")
diff --git a/eis_toolkit/prediction/weights_of_evidence.py b/eis_toolkit/prediction/weights_of_evidence.py
@@ -6,9 +6,15 @@
 import pandas as pd
 import rasterio
 from beartype import beartype
-from beartype.typing import Dict, List, Literal, Optional, Sequence, Tuple
-
-from eis_toolkit.exceptions import ClassificationFailedException, InvalidColumnException, InvalidParameterValueException
+from beartype.typing import Dict, List, Literal, Optional, Sequence, Tuple, Union
+
+from eis_toolkit.exceptions import (
+    ClassificationFailedException,
+    InvalidColumnException,
+    InvalidParameterValueException,
+    NonMatchingRasterMetadataException,
+)
+from eis_toolkit.utilities.checks.raster import check_raster_grids
 from eis_toolkit.vector_processing.rasterize_vector import rasterize_vector
 from eis_toolkit.warnings import ClassificationFailedWarning, InvalidColumnWarning
 
@@ -68,7 +74,7 @@
 }
 
 
-def _read_and_preprocess_evidence(
+def _read_and_preprocess_raster_data(
     raster: rasterio.io.DatasetReader, nodata: Optional[Number] = None, band: int = 1
 ) -> np.ndarray:
     """Read raster data and handle NoData values."""
@@ -244,6 +250,22 @@ def _generate_arrays_from_metrics(
     return array_dict
 
 
+def _calculate_nr_of_deposit_pixels(array: np.ndarray, df: pd.DataFrame) -> Tuple[int, int]:
+    masked_array = array[~np.isnan(array)]
+    nr_of_pixels = int(np.size(masked_array))
+
+    pixels_column = df["Pixel count"]
+
+    match = pixels_column == nr_of_pixels
+    if match.any():
+        nr_of_deposits = df.loc[match, "Deposit count"].iloc[0]
+    else:
+        nr_of_pixels = df["Pixel count"].sum()
+        nr_of_deposits = df["Deposit count"].sum()
+
+    return nr_of_deposits, nr_of_pixels
+
+
 @beartype
 def generalize_weights_cumulative(
     df: pd.DataFrame,
@@ -349,7 +371,7 @@ def generalize_weights_cumulative(
 @beartype
 def weights_of_evidence_calculate_weights(
     evidential_raster: rasterio.io.DatasetReader,
-    deposits: gpd.GeoDataFrame,
+    deposits: Union[gpd.GeoDataFrame, rasterio.io.DatasetReader],
     raster_nodata: Optional[Number] = None,
     weights_type: Literal["unique", "categorical", "ascending", "descending"] = "unique",
     studentized_contrast_threshold: Number = 1,
@@ -360,7 +382,7 @@ def weights_of_evidence_calculate_weights(
 
     Args:
         evidential_raster: The evidential raster.
-        deposits: Vector data representing the mineral deposits or occurences point data.
+        deposits: Vector or raster data representing the mineral deposits or occurences point data.
         raster_nodata: If nodata value of raster is wanted to specify manually. Optional parameter, defaults to None
             (nodata from raster metadata is used).
         weights_type: Accepted values are 'unique', 'categorical', 'ascending' and 'descending'.
@@ -409,13 +431,22 @@ def weights_of_evidence_calculate_weights(
         metrics_to_arrays = arrays_to_generate.copy()
 
     # 1. Preprocess data
-    evidence_array = _read_and_preprocess_evidence(evidential_raster, raster_nodata)
+    evidence_array = _read_and_preprocess_raster_data(evidential_raster, raster_nodata)
     raster_meta = evidential_raster.meta
+    raster_profile = evidential_raster.profile
 
-    # Rasterize deposits
-    deposit_array = rasterize_vector(
-        geodataframe=deposits, raster_profile=raster_meta, default_value=1.0, fill_value=0.0
-    )
+    # Rasterize deposits if vector data
+    if isinstance(deposits, gpd.GeoDataFrame):
+        deposit_array = rasterize_vector(
+            geodataframe=deposits, raster_profile=raster_meta, default_value=1.0, fill_value=0.0
+        )
+    else:
+        deposit_profile = deposits.profile
+
+        if check_raster_grids([raster_profile, deposit_profile], same_extent=True):
+            deposit_array = _read_and_preprocess_raster_data(deposits, raster_nodata)
+        else:
+            raise NonMatchingRasterMetadataException("Input rasters should have the same grid properties.")
 
     # Mask NaN out of the array
     nodata_mask = np.isnan(evidence_array)
@@ -482,7 +513,7 @@ def weights_of_evidence_calculate_weights(
 
 @beartype
 def weights_of_evidence_calculate_responses(
-    output_arrays: Sequence[Dict[str, np.ndarray]], nr_of_deposits: int, nr_of_pixels: int
+    output_arrays: Sequence[Dict[str, np.ndarray]], weights_df: pd.DataFrame
 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
     """Calculate the posterior probabilities for the given generalized weight arrays.
 
@@ -491,14 +522,17 @@ def weights_of_evidence_calculate_responses(
             For each dictionary, generalized weight and generalized standard deviation arrays are used and summed
             together pixel-wise to calculate the posterior probabilities. If generalized arrays are not found,
             the W+ and S_W+ arrays are used (so if outputs from unique weight calculations are used for this function).
-        nr_of_deposits: Number of deposit pixels in the input data for weights of evidence calculations.
-        nr_of_pixels: Number of evidence pixels in the input data for weights of evidence calculations.
+        weights_df: Output dataframe of WofE calculate weights algorithm. Used for determining number of deposits and
+            number of pixels.
 
     Returns:
         Array of posterior probabilites.
         Array of standard deviations in the posterior probability calculations.
         Array of confidence of the prospectivity values obtained in the posterior probability array.
     """
+    array = list(output_arrays[0].values())[0]
+    nr_of_deposits, nr_of_pixels = _calculate_nr_of_deposit_pixels(array, weights_df)
+
     gen_weights_sum = sum(
         [
             item[GENERALIZED_WEIGHT_PLUS_COLUMN]
@@ -531,7 +565,7 @@ def weights_of_evidence_calculate_responses(
 
 @beartype
 def agterberg_cheng_CI_test(
-    posterior_probabilities: np.ndarray, posterior_probabilities_std: np.ndarray, nr_of_deposits: int
+    posterior_probabilities: np.ndarray, posterior_probabilities_std: np.ndarray, weights_df: pd.DataFrame
 ) -> Tuple[bool, bool, bool, float, str]:
     """Perform the conditional independence test presented by Agterberg-Cheng (2002).
 
@@ -541,7 +575,8 @@ def agterberg_cheng_CI_test(
     Args:
         posterior_probabilities: Array of posterior probabilites.
         posterior_probabilities_std: Array of standard deviations in the posterior probability calculations.
-        nr_of_deposits: Number of deposit pixels in the input data for weights of evidence calculations.
+        weights_df: Output dataframe of WofE calculate weights algorithm. Used for determining number of deposits.
+
     Returns:
         Whether the conditional hypothesis can be accepted for the evidence layers that the input
             posterior probabilities and standard deviations of posterior probabilities are calculated from.
@@ -550,12 +585,8 @@ def agterberg_cheng_CI_test(
         Ratio T/n. Results > 1, may be because of lack of conditional independence of layers.
             T should not exceed n by more than 15% (Bonham-Carter 1994, p. 316).
         A summary of the the conditional independence calculations.
-
-    Raises:
-        InvalidParameterValueException: Value of nr_of_deposits is not at least 1.
     """
-    if nr_of_deposits < 1:
-        raise InvalidParameterValueException("Expected input deposits count to be at least 1.")
+    nr_of_deposits, _ = _calculate_nr_of_deposit_pixels(posterior_probabilities, weights_df)
 
     # One-tailed significance test according to Agterberg-Cheng (2002):
     # Conditional independence must satisfy:
diff --git a/eis_toolkit/utilities/checks/raster.py b/eis_toolkit/utilities/checks/raster.py
@@ -3,6 +3,7 @@
 import rasterio.transform
 from beartype import beartype
 from beartype.typing import Iterable, Sequence, Union
+from rasterio.crs import CRS
 
 from eis_toolkit.exceptions import InvalidParameterValueException
 
@@ -37,21 +38,25 @@ def check_matching_crs(objects: Iterable) -> bool:
     Returns:
         True if everything matches, False if not.
     """
-    epsg_list = []
+    crs_list = []
 
     for object in objects:
         if not isinstance(object, (rasterio.profiles.Profile, dict)):
             if not object.crs:
                 return False
             epsg = object.crs.to_epsg()
-            epsg_list.append(epsg)
+            crs_list.append(epsg)
         else:
             if "crs" in object:
-                epsg_list.append(object["crs"])
+                crs_object = object["crs"]
+                if type(crs_object) == CRS:
+                    crs_list.append(crs_object.to_epsg())
+                else:
+                    crs_list.append(crs_object)
             else:
                 return False
 
-    if len(set(epsg_list)) != 1:
+    if len(set(crs_list)) != 1:
         return False
 
     return True
@@ -119,6 +124,7 @@ def check_raster_grids(
     Returns:
         True if gridding and optionally bounds matches, False if not.
     """
+
     if not check_matching_crs(raster_profiles):
         return False
     if not check_matching_pixel_alignment(raster_profiles):