Skip to content

Commit e7354ca

Browse files
authored
Merge pull request #390 from GispoCoding/389-separate-predicting-and-testing-cli-functions-for-classifier-and-regressor-models
389 refactor ML model predicting and testing
2 parents cca25dd + 493e6c6 commit e7354ca

File tree

4 files changed

+159
-74
lines changed

4 files changed

+159
-74
lines changed

eis_toolkit/cli.py

+117-63
Original file line numberDiff line numberDiff line change
@@ -2313,97 +2313,162 @@ def gradient_boosting_regressor_train_cli(
23132313
typer.echo("Gradient boosting regressor training completed")
23142314

23152315

2316-
# EVALUATE ML MODEL
2316+
# TEST CLASSIFIER ML MODEL
23172317
@app.command()
2318-
def evaluate_trained_model_cli(
2318+
def classifier_test_cli(
23192319
input_rasters: INPUT_FILES_ARGUMENT,
23202320
target_labels: INPUT_FILE_OPTION,
23212321
model_file: INPUT_FILE_OPTION,
2322-
output_raster: OUTPUT_FILE_OPTION,
2323-
validation_metrics: Annotated[List[str], typer.Option()],
2322+
output_raster_probability: OUTPUT_FILE_OPTION,
2323+
output_raster_classified: OUTPUT_FILE_OPTION,
2324+
classification_threshold: float = 0.5,
2325+
test_metrics: Annotated[List[ClassifierMetrics], typer.Option(case_sensitive=False)] = [ClassifierMetrics.accuracy],
23242326
):
2325-
"""Predict and evaluate a trained machine learning model by predicting and scoring."""
2326-
from sklearn.base import is_classifier
2327-
2327+
"""Test trained machine learning classifier model by predicting and scoring."""
23282328
from eis_toolkit.evaluation.scoring import score_predictions
23292329
from eis_toolkit.prediction.machine_learning_general import load_model, prepare_data_for_ml, reshape_predictions
2330-
from eis_toolkit.prediction.machine_learning_predict import predict_classifier, predict_regressor
2330+
from eis_toolkit.prediction.machine_learning_predict import predict_classifier
23312331

23322332
X, y, reference_profile, nodata_mask = prepare_data_for_ml(input_rasters, target_labels)
23332333
typer.echo("Progress: 30%")
23342334

23352335
model = load_model(model_file)
2336-
if is_classifier(model):
2337-
predictions, probabilities = predict_classifier(X, model, True)
2338-
probabilities = probabilities[:, 1]
2339-
probabilities = probabilities.astype(np.float32)
2340-
probabilities_reshaped = reshape_predictions(
2341-
probabilities, reference_profile["height"], reference_profile["width"], nodata_mask
2336+
predictions, probabilities = predict_classifier(X, model, classification_threshold, True)
2337+
probabilities_reshaped = reshape_predictions(
2338+
probabilities, reference_profile["height"], reference_profile["width"], nodata_mask
2339+
)
2340+
predictions_reshaped = reshape_predictions(
2341+
predictions, reference_profile["height"], reference_profile["width"], nodata_mask
2342+
)
2343+
2344+
metrics_dict = score_predictions(y, predictions, get_enum_values(test_metrics))
2345+
typer.echo("Progress: 80%")
2346+
2347+
out_profile = reference_profile.copy()
2348+
out_profile.update({"count": 1, "dtype": np.float32})
2349+
2350+
with rasterio.open(output_raster_probability, "w", **out_profile) as dst:
2351+
dst.write(probabilities_reshaped, 1)
2352+
with rasterio.open(output_raster_classified, "w", **out_profile) as dst:
2353+
dst.write(predictions_reshaped, 1)
2354+
2355+
typer.echo("\n")
2356+
for key, value in metrics_dict.items():
2357+
typer.echo(f"{key}: {value}")
2358+
typer.echo("\n")
2359+
2360+
typer.echo("Progress: 100%")
2361+
typer.echo(
2362+
(
2363+
"Testing classifier model completed, writing rasters to "
2364+
f"{output_raster_probability} and {output_raster_classified}."
23422365
)
2343-
else:
2344-
predictions = predict_regressor(X, model)
2366+
)
23452367

2346-
metrics_dict = score_predictions(y, predictions, validation_metrics)
23472368

2369+
# TEST REGRESSOR ML MODEL
2370+
@app.command()
2371+
def regressor_test_cli(
2372+
input_rasters: INPUT_FILES_ARGUMENT,
2373+
target_labels: INPUT_FILE_OPTION,
2374+
model_file: INPUT_FILE_OPTION,
2375+
output_raster: OUTPUT_FILE_OPTION,
2376+
test_metrics: Annotated[List[RegressorMetrics], typer.Option(case_sensitive=False)] = [RegressorMetrics.mse],
2377+
):
2378+
"""Test trained machine learning regressor model by predicting and scoring."""
2379+
from eis_toolkit.evaluation.scoring import score_predictions
2380+
from eis_toolkit.prediction.machine_learning_general import load_model, prepare_data_for_ml, reshape_predictions
2381+
from eis_toolkit.prediction.machine_learning_predict import predict_regressor
2382+
2383+
X, y, reference_profile, nodata_mask = prepare_data_for_ml(input_rasters, target_labels)
2384+
typer.echo("Progress: 30%")
2385+
2386+
model = load_model(model_file)
2387+
predictions = predict_regressor(X, model)
23482388
predictions_reshaped = reshape_predictions(
23492389
predictions, reference_profile["height"], reference_profile["width"], nodata_mask
23502390
)
23512391

2392+
metrics_dict = score_predictions(y, predictions, get_enum_values(test_metrics))
23522393
typer.echo("Progress: 80%")
23532394

2354-
json_str = json.dumps(metrics_dict)
2355-
23562395
out_profile = reference_profile.copy()
23572396
out_profile.update({"count": 1, "dtype": np.float32})
23582397

2359-
if is_classifier(model):
2360-
directory = os.path.split(output_raster)[0]
2361-
name = os.path.splitext(os.path.basename(output_raster))[0]
2362-
labels_output = os.path.join(directory, name + "_labels" + ".tif")
2363-
probabilities_output = os.path.join(directory, name + "_probabilities" + ".tif")
2364-
for output_path, output_data in zip(
2365-
[labels_output, probabilities_output], [predictions_reshaped, probabilities_reshaped]
2366-
):
2367-
with rasterio.open(output_path, "w", **out_profile) as dst:
2368-
dst.write(output_data, 1)
2369-
else:
2370-
with rasterio.open(output_raster, "w", **out_profile) as dst:
2371-
dst.write(predictions_reshaped, 1)
2398+
with rasterio.open(output_raster, "w", **out_profile) as dst:
2399+
dst.write(predictions_reshaped, 1)
23722400

2373-
typer.echo("Progress: 100%")
2374-
typer.echo(f"Results: {json_str}")
2401+
typer.echo("\n")
2402+
for key, value in metrics_dict.items():
2403+
typer.echo(f"{key}: {value}")
2404+
typer.echo("\n")
23752405

2376-
typer.echo("Evaluating trained model completed")
2406+
typer.echo("Progress: 100%\n")
2407+
2408+
typer.echo(f"Testing regressor model completed, writing raster to {output_raster}.")
23772409

23782410

23792411
# PREDICT WITH TRAINED ML MODEL
23802412
@app.command()
2381-
def predict_with_trained_model_cli(
2413+
def classifier_predict_cli(
23822414
input_rasters: INPUT_FILES_ARGUMENT,
23832415
model_file: INPUT_FILE_OPTION,
2384-
output_raster: OUTPUT_FILE_OPTION,
2416+
output_raster_probability: OUTPUT_FILE_OPTION,
2417+
output_raster_classified: OUTPUT_FILE_OPTION,
2418+
classification_threshold: float = 0.5,
23852419
):
2386-
"""Predict with a trained machine learning model."""
2387-
from sklearn.base import is_classifier
2388-
2420+
"""Predict with a trained machine learning classifier model."""
23892421
from eis_toolkit.prediction.machine_learning_general import load_model, prepare_data_for_ml, reshape_predictions
2390-
from eis_toolkit.prediction.machine_learning_predict import predict_classifier, predict_regressor
2422+
from eis_toolkit.prediction.machine_learning_predict import predict_classifier
23912423

23922424
X, _, reference_profile, nodata_mask = prepare_data_for_ml(input_rasters)
23932425

23942426
typer.echo("Progress: 30%")
23952427

23962428
model = load_model(model_file)
2397-
if is_classifier(model):
2398-
predictions, probabilities = predict_classifier(X, model, True)
2399-
probabilities = probabilities[:, 1]
2400-
probabilities = probabilities.astype(np.float32)
2401-
probabilities_reshaped = reshape_predictions(
2402-
probabilities, reference_profile["height"], reference_profile["width"], nodata_mask
2429+
predictions, probabilities = predict_classifier(X, model, classification_threshold, True)
2430+
probabilities_reshaped = reshape_predictions(
2431+
probabilities, reference_profile["height"], reference_profile["width"], nodata_mask
2432+
)
2433+
predictions_reshaped = reshape_predictions(
2434+
predictions, reference_profile["height"], reference_profile["width"], nodata_mask
2435+
)
2436+
typer.echo("Progress: 80%")
2437+
2438+
out_profile = reference_profile.copy()
2439+
out_profile.update({"count": 1, "dtype": np.float32})
2440+
2441+
with rasterio.open(output_raster_probability, "w", **out_profile) as dst:
2442+
dst.write(probabilities_reshaped, 1)
2443+
with rasterio.open(output_raster_classified, "w", **out_profile) as dst:
2444+
dst.write(predictions_reshaped, 1)
2445+
2446+
typer.echo("Progress: 100%")
2447+
typer.echo(
2448+
(
2449+
"Predicting with classifier model completed, writing rasters to "
2450+
f"{output_raster_probability} and {output_raster_classified}."
24032451
)
2404-
else:
2405-
predictions = predict_regressor(X, model)
2452+
)
2453+
24062454

2455+
# PREDICT WITH TRAINED ML MODEL
2456+
@app.command()
2457+
def regressor_predict_cli(
2458+
input_rasters: INPUT_FILES_ARGUMENT,
2459+
model_file: INPUT_FILE_OPTION,
2460+
output_raster: OUTPUT_FILE_OPTION,
2461+
):
2462+
"""Predict with a trained machine learning regressor model."""
2463+
from eis_toolkit.prediction.machine_learning_general import load_model, prepare_data_for_ml, reshape_predictions
2464+
from eis_toolkit.prediction.machine_learning_predict import predict_regressor
2465+
2466+
X, _, reference_profile, nodata_mask = prepare_data_for_ml(input_rasters)
2467+
2468+
typer.echo("Progress: 30%")
2469+
2470+
model = load_model(model_file)
2471+
predictions = predict_regressor(X, model)
24072472
predictions_reshaped = reshape_predictions(
24082473
predictions, reference_profile["height"], reference_profile["width"], nodata_mask
24092474
)
@@ -2413,22 +2478,11 @@ def predict_with_trained_model_cli(
24132478
out_profile = reference_profile.copy()
24142479
out_profile.update({"count": 1, "dtype": np.float32})
24152480

2416-
if is_classifier(model):
2417-
directory = os.path.split(output_raster)[0]
2418-
name = os.path.splitext(os.path.basename(output_raster))[0]
2419-
labels_output = os.path.join(directory, name + "_labels" + ".tif")
2420-
probabilities_output = os.path.join(directory, name + "_probabilities" + ".tif")
2421-
for output_path, output_data in zip(
2422-
[labels_output, probabilities_output], [predictions_reshaped, probabilities_reshaped]
2423-
):
2424-
with rasterio.open(output_path, "w", **out_profile) as dst:
2425-
dst.write(output_data, 1)
2426-
else:
2427-
with rasterio.open(output_raster, "w", **out_profile) as dst:
2428-
dst.write(predictions_reshaped, 1)
2481+
with rasterio.open(output_raster, "w", **out_profile) as dst:
2482+
dst.write(predictions_reshaped, 1)
24292483

24302484
typer.echo("Progress: 100%")
2431-
typer.echo("Predicting completed")
2485+
typer.echo(f"Predicting with regressor model completed, writing raster to {output_raster}.")
24322486

24332487

24342488
# FUZZY OVERLAYS

eis_toolkit/exceptions.py

+4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ class InvalidDataShapeException(Exception):
3434
"""Exception error for datasets with invalid shapes."""
3535

3636

37+
class InvalidModelTypeException(Exception):
38+
"""Exception error for invalid model type."""
39+
40+
3741
class InvalidParameterValueException(Exception):
3842
"""Exception error class for invalid parameter values."""
3943

eis_toolkit/prediction/machine_learning_predict.py

+36-9
Original file line numberDiff line numberDiff line change
@@ -2,40 +2,62 @@
22
import pandas as pd
33
from beartype import beartype
44
from beartype.typing import Tuple, Union
5-
from sklearn.base import BaseEstimator
5+
from sklearn.base import BaseEstimator, is_classifier
66
from tensorflow import keras
77

8+
from eis_toolkit.exceptions import InvalidModelTypeException
9+
810

911
@beartype
1012
def predict_classifier(
11-
data: Union[np.ndarray, pd.DataFrame], model: Union[BaseEstimator, keras.Model], include_probabilities: bool = True
13+
data: Union[np.ndarray, pd.DataFrame],
14+
model: Union[BaseEstimator, keras.Model],
15+
classification_threshold: float = 0.5,
16+
include_probabilities: bool = True,
1217
) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
1318
"""
14-
Predict with a trained model.
19+
Predict with a trained classifier model.
1520
1621
Args:
1722
data: Data used to make predictions.
1823
model: Trained classifier or regressor. Can be any machine learning model trained with
1924
EIS Toolkit (Sklearn and Keras models).
25+
classification_threshold: Threshold for classifying based on probabilities. Only used for
26+
binary classification. Defaults to 0.5.
2027
include_probabilities: If the probability array should be returned too. Defaults to True.
2128
2229
Returns:
23-
Predicted labels and optionally predicted probabilities by a classifier model.
30+
Predicted labels and optionally predicted probabilities as one-dimensional arrays by a classifier model.
31+
32+
Raises:
33+
InvalidModelTypeException: Input model is not a classifier model.
2434
"""
2535
if isinstance(model, keras.Model):
26-
probabilities = model.predict(data)
27-
labels = probabilities.argmax(axis=-1)
36+
probabilities = model.predict(data).astype(np.float32)
37+
if probabilities.shape[1] == 1: # Binary classification
38+
probabilities = probabilities.squeeze()
39+
labels = (probabilities >= classification_threshold).astype(np.float32)
40+
else: # Multiclass classification
41+
labels = probabilities.argmax(axis=-1).astype(np.float32)
2842
if include_probabilities:
2943
return labels, probabilities
3044
else:
3145
return labels
3246
elif isinstance(model, BaseEstimator):
33-
labels = model.predict(data)
47+
if not is_classifier(model):
48+
raise InvalidModelTypeException(f"Expected a classifier model: {type(model)}.")
49+
probabilities = model.predict_proba(data).astype(np.float32)
50+
if probabilities.shape[1] == 2: # Binary classification
51+
probabilities = probabilities[:, 1]
52+
labels = (probabilities >= classification_threshold).astype(np.float32)
53+
else: # Multiclass classification
54+
labels = probabilities.argmax(axis=-1).astype(np.float32)
3455
if include_probabilities:
35-
probabilities = model.predict_proba(data)
3656
return labels, probabilities
3757
else:
3858
return labels
59+
else:
60+
raise InvalidModelTypeException(f"Model type not recognized: {type(model)}.")
3961

4062

4163
@beartype
@@ -44,7 +66,7 @@ def predict_regressor(
4466
model: Union[BaseEstimator, keras.Model],
4567
) -> np.ndarray:
4668
"""
47-
Predict with a trained model.
69+
Predict with a trained regressor model.
4870
4971
Args:
5072
data: Data used to make predictions.
@@ -53,6 +75,11 @@ def predict_regressor(
5375
5476
Returns:
5577
Regression model prediction array.
78+
79+
Raises:
80+
InvalidModelTypeException: Input model is not a regressor model.
5681
"""
82+
if is_classifier(model):
83+
raise InvalidModelTypeException(f"Expected a regressor model: {type(model)}.")
5784
result = model.predict(data)
5885
return result

tests/prediction/machine_learning_general_test.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def test_evaluate_model_sklearn():
118118
X_train, y_train, model=RF_MODEL, validation_method="none", metrics=CLF_METRICS, random_state=42
119119
)
120120

121-
predictions = predict_classifier(X_test, model, include_probabilities=False)
121+
predictions = predict_classifier(X_test, model, classification_threshold=0.5, include_probabilities=False)
122122
accuracy = score_predictions(y_test, predictions, "accuracy")
123123
np.testing.assert_equal(accuracy, 1.0)
124124

@@ -131,7 +131,7 @@ def test_predict_classifier_sklearn():
131131
X_train, y_train, model=RF_MODEL, validation_method="none", metrics=CLF_METRICS, random_state=42
132132
)
133133

134-
predicted_labels, predicted_probabilities = predict_classifier(X_test, model, True)
134+
predicted_labels, predicted_probabilities = predict_classifier(X_test, model, include_probabilities=True)
135135
np.testing.assert_equal(len(predicted_labels), len(y_test))
136136
np.testing.assert_equal(len(predicted_probabilities), len(y_test))
137137

0 commit comments

Comments
 (0)