Skip to content

Commit 417e74a

Browse files
nmaarniomipeso
andauthored
Wofe CLI fixes (#482)
--------- Co-authored-by: mipeso <[email protected]>
1 parent f6fac24 commit 417e74a

File tree

3 files changed

+99
-43
lines changed

3 files changed

+99
-43
lines changed

eis_toolkit/cli.py

+37-18
Original file line numberDiff line numberDiff line change
@@ -3091,9 +3091,10 @@ def gamma_overlay_cli(input_rasters: INPUT_FILES_ARGUMENT, output_raster: OUTPUT
30913091
# WOFE
30923092
@app.command()
30933093
def weights_of_evidence_calculate_weights_cli(
3094-
input_raster: INPUT_FILE_OPTION,
3095-
input_vector: INPUT_FILE_OPTION,
3096-
output_dir: OUTPUT_DIR_OPTION,
3094+
evidential_raster: INPUT_FILE_OPTION,
3095+
deposits: INPUT_FILE_OPTION,
3096+
output_results_table: OUTPUT_FILE_OPTION,
3097+
output_raster_dir: OUTPUT_DIR_OPTION,
30973098
raster_nodata: Optional[float] = None,
30983099
weights_type: Annotated[WeightsType, typer.Option(case_sensitive=False)] = WeightsType.unique,
30993100
studentized_contrast_threshold: float = 1,
@@ -3102,6 +3103,9 @@ def weights_of_evidence_calculate_weights_cli(
31023103
"""
31033104
Calculate weights of spatial associations.
31043105
3106+
Save path for resulting CSV is set using --output-results-table parameter. Output rasters are saved to directory
3107+
set with --output-raster-dir parameter.
3108+
31053109
Parameter --studentized-contrast-threshold is used with 'categorical', 'ascending' and 'descending' weight types.
31063110
31073111
Parameter --arrays-to-generate controls which columns in the weights dataframe are returned as arrays. All column
@@ -3115,8 +3119,13 @@ def weights_of_evidence_calculate_weights_cli(
31153119

31163120
typer.echo("Progress: 10%")
31173121

3118-
evidential_raster = rasterio.open(input_raster)
3119-
deposits = gpd.read_file(input_vector)
3122+
evidential_raster = rasterio.open(evidential_raster)
3123+
3124+
if deposits.suffix in (".tif", ".tiff", ".asc", ".img", ".vrt", ".grd"):
3125+
deposits = rasterio.open(deposits)
3126+
else:
3127+
deposits = gpd.read_file(deposits)
3128+
31203129
typer.echo("Progress: 25%")
31213130

31223131
if arrays_to_generate == []:
@@ -3132,36 +3141,42 @@ def weights_of_evidence_calculate_weights_cli(
31323141
)
31333142
typer.echo("Progress: 75%")
31343143

3135-
df.to_csv(output_dir.joinpath("wofe_results.csv"))
3144+
df.to_csv(output_results_table)
31363145

31373146
out_rasters_dict = {}
3138-
file_name = input_raster.name.split(".")[0]
3147+
file_name = evidential_raster.name.split("/")[-1].split(".")[0]
3148+
raster_meta.pop("dtype") # Remove dtype from metadata to set it individually
3149+
31393150
for key, array in arrays.items():
3151+
# Set correct dtype for the array
3152+
if key in ["Class", "Pixel count", "Deposit count"]:
3153+
dtype = np.uint8
3154+
else:
3155+
dtype = np.float32
3156+
31403157
array = nan_to_nodata(array, raster_meta["nodata"])
31413158
output_raster_name = file_name + "_weights_" + weights_type + "_" + key
3142-
output_raster_path = output_dir.joinpath(output_raster_name + ".tif")
3143-
with rasterio.open(output_raster_path, "w", **raster_meta) as dst:
3159+
output_raster_path = output_raster_dir.joinpath(output_raster_name + ".tif")
3160+
with rasterio.open(output_raster_path, "w", dtype=dtype, **raster_meta) as dst:
31443161
dst.write(array, 1)
31453162
out_rasters_dict[output_raster_name] = str(output_raster_path)
31463163

31473164
json_str = json.dumps(out_rasters_dict)
31483165
typer.echo("Progress 100%")
31493166

3150-
typer.echo(f"Number of deposit pixels: {nr_of_deposits}")
3151-
typer.echo(f"Number of all evidence pixels: {nr_of_pixels}")
31523167
typer.echo(f"Output rasters: {json_str}")
3153-
typer.echo(f"Weight calculations completed, rasters and CSV saved to {output_dir}.")
3168+
typer.echo(f"Weight calculations completed, rasters saved to {output_raster_dir}.")
3169+
typer.echo(f"Weights table saved to {output_results_table}.")
31543170

31553171

31563172
@app.command()
31573173
def weights_of_evidence_calculate_responses_cli(
31583174
input_rasters_weights: INPUT_FILES_OPTION,
31593175
input_rasters_standard_deviations: INPUT_FILES_OPTION,
3176+
input_weights_table: INPUT_FILE_OPTION,
31603177
output_probabilities: OUTPUT_FILE_OPTION,
31613178
output_probabilities_std: OUTPUT_FILE_OPTION,
31623179
output_confidence_array: OUTPUT_FILE_OPTION,
3163-
nr_of_deposits: Annotated[int, typer.Option()],
3164-
nr_of_pixels: Annotated[int, typer.Option()],
31653180
):
31663181
"""
31673182
Calculate the posterior probabilities for the given generalized weight arrays.
@@ -3198,10 +3213,12 @@ def weights_of_evidence_calculate_responses_cli(
31983213

31993214
dict_array.append({"W+": array_W, "S_W+": array_S_W})
32003215

3216+
weights_df = pd.read_csv(input_weights_table)
3217+
32013218
typer.echo("Progress: 25%")
32023219

32033220
posterior_probabilities, posterior_probabilies_std, confidence_array = weights_of_evidence_calculate_responses(
3204-
output_arrays=dict_array, nr_of_deposits=nr_of_deposits, nr_of_pixels=nr_of_pixels
3221+
output_arrays=dict_array, weights_df=weights_df
32053222
)
32063223
typer.echo("Progress: 75%")
32073224

@@ -3220,7 +3237,7 @@ def weights_of_evidence_calculate_responses_cli(
32203237
typer.echo("Progress: 100%")
32213238

32223239
typer.echo(
3223-
f"Responses calculations finished, writing output rasters to {output_probabilities}, \
3240+
f"Posterior probability calculations finished, output rasters saved to {output_probabilities}, \
32243241
{output_probabilities_std} and {output_confidence_array}"
32253242
)
32263243

@@ -3229,7 +3246,7 @@ def weights_of_evidence_calculate_responses_cli(
32293246
def agterberg_cheng_CI_test_cli(
32303247
input_posterior_probabilities: INPUT_FILE_OPTION,
32313248
input_posterior_probabilities_std: INPUT_FILE_OPTION,
3232-
nr_of_deposits: Annotated[int, typer.Option()],
3249+
input_weights_table: INPUT_FILE_OPTION,
32333250
):
32343251
"""Perform the conditional independence test presented by Agterberg-Cheng (2002)."""
32353252
from eis_toolkit.prediction.weights_of_evidence import agterberg_cheng_CI_test
@@ -3244,12 +3261,14 @@ def agterberg_cheng_CI_test_cli(
32443261
posterior_probabilities_std = src.read(1)
32453262
posterior_probabilities_std = nodata_to_nan(posterior_probabilities_std, src.nodata)
32463263

3264+
weights_df = pd.read_csv(input_weights_table)
3265+
32473266
typer.echo("Progress: 25%")
32483267

32493268
_, _, _, _, summary = agterberg_cheng_CI_test(
32503269
posterior_probabilities=posterior_probabilities,
32513270
posterior_probabilities_std=posterior_probabilities_std,
3252-
nr_of_deposits=nr_of_deposits,
3271+
weights_df=weights_df,
32533272
)
32543273

32553274
typer.echo("Progress: 100%")

eis_toolkit/prediction/weights_of_evidence.py

+52-21
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,15 @@
66
import pandas as pd
77
import rasterio
88
from beartype import beartype
9-
from beartype.typing import Dict, List, Literal, Optional, Sequence, Tuple
10-
11-
from eis_toolkit.exceptions import ClassificationFailedException, InvalidColumnException, InvalidParameterValueException
9+
from beartype.typing import Dict, List, Literal, Optional, Sequence, Tuple, Union
10+
11+
from eis_toolkit.exceptions import (
12+
ClassificationFailedException,
13+
InvalidColumnException,
14+
InvalidParameterValueException,
15+
NonMatchingRasterMetadataException,
16+
)
17+
from eis_toolkit.utilities.checks.raster import check_raster_grids
1218
from eis_toolkit.vector_processing.rasterize_vector import rasterize_vector
1319
from eis_toolkit.warnings import ClassificationFailedWarning, InvalidColumnWarning
1420

@@ -68,7 +74,7 @@
6874
}
6975

7076

71-
def _read_and_preprocess_evidence(
77+
def _read_and_preprocess_raster_data(
7278
raster: rasterio.io.DatasetReader, nodata: Optional[Number] = None, band: int = 1
7379
) -> np.ndarray:
7480
"""Read raster data and handle NoData values."""
@@ -244,6 +250,22 @@ def _generate_arrays_from_metrics(
244250
return array_dict
245251

246252

253+
def _calculate_nr_of_deposit_pixels(array: np.ndarray, df: pd.DataFrame) -> Tuple[int, int]:
254+
masked_array = array[~np.isnan(array)]
255+
nr_of_pixels = int(np.size(masked_array))
256+
257+
pixels_column = df["Pixel count"]
258+
259+
match = pixels_column == nr_of_pixels
260+
if match.any():
261+
nr_of_deposits = df.loc[match, "Deposit count"].iloc[0]
262+
else:
263+
nr_of_pixels = df["Pixel count"].sum()
264+
nr_of_deposits = df["Deposit count"].sum()
265+
266+
return nr_of_deposits, nr_of_pixels
267+
268+
247269
@beartype
248270
def generalize_weights_cumulative(
249271
df: pd.DataFrame,
@@ -349,7 +371,7 @@ def generalize_weights_cumulative(
349371
@beartype
350372
def weights_of_evidence_calculate_weights(
351373
evidential_raster: rasterio.io.DatasetReader,
352-
deposits: gpd.GeoDataFrame,
374+
deposits: Union[gpd.GeoDataFrame, rasterio.io.DatasetReader],
353375
raster_nodata: Optional[Number] = None,
354376
weights_type: Literal["unique", "categorical", "ascending", "descending"] = "unique",
355377
studentized_contrast_threshold: Number = 1,
@@ -360,7 +382,7 @@ def weights_of_evidence_calculate_weights(
360382
361383
Args:
362384
evidential_raster: The evidential raster.
363-
deposits: Vector data representing the mineral deposits or occurences point data.
385+
deposits: Vector or raster data representing the mineral deposits or occurences point data.
364386
raster_nodata: If nodata value of raster is wanted to specify manually. Optional parameter, defaults to None
365387
(nodata from raster metadata is used).
366388
weights_type: Accepted values are 'unique', 'categorical', 'ascending' and 'descending'.
@@ -409,13 +431,22 @@ def weights_of_evidence_calculate_weights(
409431
metrics_to_arrays = arrays_to_generate.copy()
410432

411433
# 1. Preprocess data
412-
evidence_array = _read_and_preprocess_evidence(evidential_raster, raster_nodata)
434+
evidence_array = _read_and_preprocess_raster_data(evidential_raster, raster_nodata)
413435
raster_meta = evidential_raster.meta
436+
raster_profile = evidential_raster.profile
414437

415-
# Rasterize deposits
416-
deposit_array = rasterize_vector(
417-
geodataframe=deposits, raster_profile=raster_meta, default_value=1.0, fill_value=0.0
418-
)
438+
# Rasterize deposits if vector data
439+
if isinstance(deposits, gpd.GeoDataFrame):
440+
deposit_array = rasterize_vector(
441+
geodataframe=deposits, raster_profile=raster_meta, default_value=1.0, fill_value=0.0
442+
)
443+
else:
444+
deposit_profile = deposits.profile
445+
446+
if check_raster_grids([raster_profile, deposit_profile], same_extent=True):
447+
deposit_array = _read_and_preprocess_raster_data(deposits, raster_nodata)
448+
else:
449+
raise NonMatchingRasterMetadataException("Input rasters should have the same grid properties.")
419450

420451
# Mask NaN out of the array
421452
nodata_mask = np.isnan(evidence_array)
@@ -482,7 +513,7 @@ def weights_of_evidence_calculate_weights(
482513

483514
@beartype
484515
def weights_of_evidence_calculate_responses(
485-
output_arrays: Sequence[Dict[str, np.ndarray]], nr_of_deposits: int, nr_of_pixels: int
516+
output_arrays: Sequence[Dict[str, np.ndarray]], weights_df: pd.DataFrame
486517
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
487518
"""Calculate the posterior probabilities for the given generalized weight arrays.
488519
@@ -491,14 +522,17 @@ def weights_of_evidence_calculate_responses(
491522
For each dictionary, generalized weight and generalized standard deviation arrays are used and summed
492523
together pixel-wise to calculate the posterior probabilities. If generalized arrays are not found,
493524
the W+ and S_W+ arrays are used (so if outputs from unique weight calculations are used for this function).
494-
nr_of_deposits: Number of deposit pixels in the input data for weights of evidence calculations.
495-
nr_of_pixels: Number of evidence pixels in the input data for weights of evidence calculations.
525+
weights_df: Output dataframe of WofE calculate weights algorithm. Used for determining number of deposits and
526+
number of pixels.
496527
497528
Returns:
498529
Array of posterior probabilites.
499530
Array of standard deviations in the posterior probability calculations.
500531
Array of confidence of the prospectivity values obtained in the posterior probability array.
501532
"""
533+
array = list(output_arrays[0].values())[0]
534+
nr_of_deposits, nr_of_pixels = _calculate_nr_of_deposit_pixels(array, weights_df)
535+
502536
gen_weights_sum = sum(
503537
[
504538
item[GENERALIZED_WEIGHT_PLUS_COLUMN]
@@ -531,7 +565,7 @@ def weights_of_evidence_calculate_responses(
531565

532566
@beartype
533567
def agterberg_cheng_CI_test(
534-
posterior_probabilities: np.ndarray, posterior_probabilities_std: np.ndarray, nr_of_deposits: int
568+
posterior_probabilities: np.ndarray, posterior_probabilities_std: np.ndarray, weights_df: pd.DataFrame
535569
) -> Tuple[bool, bool, bool, float, str]:
536570
"""Perform the conditional independence test presented by Agterberg-Cheng (2002).
537571
@@ -541,7 +575,8 @@ def agterberg_cheng_CI_test(
541575
Args:
542576
posterior_probabilities: Array of posterior probabilites.
543577
posterior_probabilities_std: Array of standard deviations in the posterior probability calculations.
544-
nr_of_deposits: Number of deposit pixels in the input data for weights of evidence calculations.
578+
weights_df: Output dataframe of WofE calculate weights algorithm. Used for determining number of deposits.
579+
545580
Returns:
546581
Whether the conditional hypothesis can be accepted for the evidence layers that the input
547582
posterior probabilities and standard deviations of posterior probabilities are calculated from.
@@ -550,12 +585,8 @@ def agterberg_cheng_CI_test(
550585
Ratio T/n. Results > 1, may be because of lack of conditional independence of layers.
551586
T should not exceed n by more than 15% (Bonham-Carter 1994, p. 316).
552587
A summary of the the conditional independence calculations.
553-
554-
Raises:
555-
InvalidParameterValueException: Value of nr_of_deposits is not at least 1.
556588
"""
557-
if nr_of_deposits < 1:
558-
raise InvalidParameterValueException("Expected input deposits count to be at least 1.")
589+
nr_of_deposits, _ = _calculate_nr_of_deposit_pixels(posterior_probabilities, weights_df)
559590

560591
# One-tailed significance test according to Agterberg-Cheng (2002):
561592
# Conditional independence must satisfy:

eis_toolkit/utilities/checks/raster.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import rasterio.transform
44
from beartype import beartype
55
from beartype.typing import Iterable, Sequence, Union
6+
from rasterio.crs import CRS
67

78
from eis_toolkit.exceptions import InvalidParameterValueException
89

@@ -37,21 +38,25 @@ def check_matching_crs(objects: Iterable) -> bool:
3738
Returns:
3839
True if everything matches, False if not.
3940
"""
40-
epsg_list = []
41+
crs_list = []
4142

4243
for object in objects:
4344
if not isinstance(object, (rasterio.profiles.Profile, dict)):
4445
if not object.crs:
4546
return False
4647
epsg = object.crs.to_epsg()
47-
epsg_list.append(epsg)
48+
crs_list.append(epsg)
4849
else:
4950
if "crs" in object:
50-
epsg_list.append(object["crs"])
51+
crs_object = object["crs"]
52+
if type(crs_object) == CRS:
53+
crs_list.append(crs_object.to_epsg())
54+
else:
55+
crs_list.append(crs_object)
5156
else:
5257
return False
5358

54-
if len(set(epsg_list)) != 1:
59+
if len(set(crs_list)) != 1:
5560
return False
5661

5762
return True
@@ -119,6 +124,7 @@ def check_raster_grids(
119124
Returns:
120125
True if gridding and optionally bounds matches, False if not.
121126
"""
127+
122128
if not check_matching_crs(raster_profiles):
123129
return False
124130
if not check_matching_pixel_alignment(raster_profiles):

0 commit comments

Comments
 (0)