Merge pull request #411 from GispoCoding/406-reduce-number-of-decimals-in-numeric-outputs

nmaarnio · web-flow · commit 2390407365d3 · 2024-06-18T11:19:50.000+03:00
Reduce number of decimals in outputs
diff --git a/eis_toolkit/cli.py b/eis_toolkit/cli.py
@@ -2375,7 +2375,7 @@ def classifier_test_cli(
         predictions, reference_profile["height"], reference_profile["width"], nodata_mask
     )
 
-    metrics_dict = score_predictions(y, predictions, get_enum_values(test_metrics))
+    metrics_dict = score_predictions(y, predictions, get_enum_values(test_metrics), decimals=3)
     typer.echo("Progress: 80%")
 
     out_profile = reference_profile.copy()
@@ -2421,7 +2421,7 @@ def regressor_test_cli(
         predictions, reference_profile["height"], reference_profile["width"], nodata_mask
     )
 
-    metrics_dict = score_predictions(y, predictions, get_enum_values(test_metrics))
+    metrics_dict = score_predictions(y, predictions, get_enum_values(test_metrics), decimals=3)
     typer.echo("Progress: 80%")
 
     out_profile = reference_profile.copy()
@@ -3109,7 +3109,7 @@ def summarize_probability_metrics_cli(true_labels: INPUT_FILE_OPTION, probabilit
     (y_prob, y_true), _, _ = read_data_for_evaluation([probabilities, true_labels])
     typer.echo("Progress: 25%")
 
-    results_dict = summarize_probability_metrics(y_true=y_true, y_prob=y_prob)
+    results_dict = summarize_probability_metrics(y_true=y_true, y_prob=y_prob, decimals=3)
 
     typer.echo("Progress: 75%")
 
@@ -3135,7 +3135,7 @@ def summarize_label_metrics_binary_cli(true_labels: INPUT_FILE_OPTION, predictio
     (y_pred, y_true), _, _ = read_data_for_evaluation([predictions, true_labels])
     typer.echo("Progress: 25%")
 
-    results_dict = summarize_label_metrics_binary(y_true=y_true, y_pred=y_pred)
+    results_dict = summarize_label_metrics_binary(y_true=y_true, y_pred=y_pred, decimals=3)
     typer.echo("Progress: 75%")
 
     typer.echo("Progress: 100% \n")
@@ -3340,6 +3340,7 @@ def score_predictions_cli(
     true_labels: INPUT_FILE_OPTION,
     predictions: INPUT_FILE_OPTION,
     metrics: Annotated[List[str], typer.Option()],
+    decimals: Optional[int] = None,
 ):
     """Score predictions."""
     from eis_toolkit.evaluation.scoring import score_predictions
@@ -3350,7 +3351,7 @@ def score_predictions_cli(
     (y_pred, y_true), _, _ = read_data_for_evaluation([predictions, true_labels])
     typer.echo("Progress: 25%")
 
-    outputs = score_predictions(y_true, y_pred, metrics)
+    outputs = score_predictions(y_true, y_pred, metrics, decimals)
     typer.echo("Progress: 100% \n")
 
     typer.echo(f"Results: {str(outputs)}")
diff --git a/eis_toolkit/evaluation/classification_label_evaluation.py b/eis_toolkit/evaluation/classification_label_evaluation.py
@@ -1,11 +1,15 @@
 from numbers import Number
-from typing import Dict
 
 import numpy as np
+from beartype.typing import Dict, Optional
 from sklearn.metrics import accuracy_score, confusion_matrix, precision_recall_fscore_support
 
 
-def summarize_label_metrics_binary(y_true: np.ndarray, y_pred: np.ndarray) -> Dict[str, Number]:
+def summarize_label_metrics_binary(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    decimals: Optional[int] = None,
+) -> Dict[str, Number]:
     """
     Generate a comprehensive report of various evaluation metrics for binary classification results.
 
@@ -15,18 +19,21 @@ def summarize_label_metrics_binary(y_true: np.ndarray, y_pred: np.ndarray) -> Di
     Args:
         y_true: True labels.
         y_pred: Predicted labels. The array should come from a binary classifier.
+        decimals: Number of decimals used in rounding the scores. If None, scores are not rounded.
+            Defaults to None.
 
     Returns:
         A dictionary containing the evaluated metrics.
     """
     metrics = {}
 
-    metrics["Accuracy"] = accuracy_score(y_true, y_pred)
+    accuracy = accuracy_score(y_true, y_pred)
+    metrics["Accuracy"] = round(accuracy, decimals) if decimals is not None else accuracy
 
     precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary")
-    metrics["Precision"] = precision
-    metrics["Recall"] = recall
-    metrics["F1_score"] = f1
+    metrics["Precision"] = round(precision, decimals) if decimals is not None else precision
+    metrics["Recall"] = round(recall, decimals) if decimals is not None else recall
+    metrics["F1_score"] = round(f1, decimals) if decimals is not None else f1
 
     tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
     metrics["True_negatives"] = tn
diff --git a/eis_toolkit/evaluation/classification_probability_evaluation.py b/eis_toolkit/evaluation/classification_probability_evaluation.py
@@ -1,9 +1,7 @@
-from typing import Dict
-
 import matplotlib.pyplot as plt
 import numpy as np
 import seaborn as sns
-from beartype.typing import Optional
+from beartype.typing import Dict, Optional
 from sklearn.calibration import CalibrationDisplay
 from sklearn.metrics import (
     DetCurveDisplay,
@@ -16,7 +14,11 @@
 )
 
 
-def summarize_probability_metrics(y_true: np.ndarray, y_prob: np.ndarray) -> Dict[str, float]:
+def summarize_probability_metrics(
+    y_true: np.ndarray,
+    y_prob: np.ndarray,
+    decimals: Optional[int] = None,
+) -> Dict[str, float]:
     """
     Generate a comprehensive report of various evaluation metrics for classification probabilities.
 
@@ -26,6 +28,8 @@ def summarize_probability_metrics(y_true: np.ndarray, y_prob: np.ndarray) -> Dic
         y_true: True labels.
         y_prob: Predicted probabilities for the positive class. The array should come from
             a binary classifier.
+        decimals: Number of decimals used in rounding the scores. If None, scores are not rounded.
+            Defaults to None.
 
     Returns:
         A dictionary containing the evaluated metrics.
@@ -37,6 +41,9 @@ def summarize_probability_metrics(y_true: np.ndarray, y_prob: np.ndarray) -> Dic
     metrics["average_precision"] = average_precision_score(y_true, y_prob)
     metrics["brier_score_loss"] = brier_score_loss(y_true, y_prob)
 
+    for key, value in metrics.items():
+        metrics[key] = round(value, decimals) if decimals is not None else value
+
     return metrics
 
 
diff --git a/eis_toolkit/evaluation/scoring.py b/eis_toolkit/evaluation/scoring.py
@@ -1,4 +1,5 @@
 from numbers import Number
+from typing import Optional
 
 import numpy as np
 import pandas as pd
@@ -19,7 +20,10 @@
 
 @beartype
 def score_predictions(
-    y_true: Union[np.ndarray, pd.Series], y_pred: Union[np.ndarray, pd.Series], metrics: Union[str, Sequence[str]]
+    y_true: Union[np.ndarray, pd.Series],
+    y_pred: Union[np.ndarray, pd.Series],
+    metrics: Union[str, Sequence[str]],
+    decimals: Optional[int] = None,
 ) -> Union[Number, Dict[str, Number]]:
     """
     Score model predictions with given metrics.
@@ -34,18 +38,20 @@ def score_predictions(
         y_pred: Predicted labels.
         metrics: The metrics to use for scoring the model. Select only metrics applicable
             for the model type.
+        decimals: Number of decimals used in rounding the scores. If None, scores are not rounded.
+            Defaults to None.
 
     Returns:
         Metric scores as a dictionary if multiple metrics, otherwise just the metric value.
     """
     if isinstance(metrics, str):
         score = _score_predictions(y_true, y_pred, metrics)
-        return score
+        return round(score, decimals) if decimals is not None else score
     else:
         out_metrics = {}
         for metric in metrics:
             score = _score_predictions(y_true, y_pred, metric)
-            out_metrics[metric] = score
+            out_metrics[metric] = round(score, decimals) if decimals is not None else score
         return out_metrics
 
 
diff --git a/eis_toolkit/prediction/machine_learning_general.py b/eis_toolkit/prediction/machine_learning_general.py
@@ -350,7 +350,7 @@ def _train_and_validate_sklearn_model(
 
         out_metrics = {}
         for metric in metrics:
-            score = score_predictions(y_valid, y_pred, metric)
+            score = score_predictions(y_valid, y_pred, metric, decimals=3)
             out_metrics[metric] = score
 
     # Validation approach 3: Cross-validation
@@ -369,7 +369,7 @@ def _train_and_validate_sklearn_model(
             y_pred = model.predict(X[valid_index])
 
             for metric in metrics:
-                score = score_predictions(y[valid_index], y_pred, metric)
+                score = score_predictions(y[valid_index], y_pred, metric, decimals=3)
                 all_scores = out_metrics[metric][f"{metric}_all"]
                 all_scores.append(score)
 
diff --git a/tests/prediction/gradient_boosting_test.py b/tests/prediction/gradient_boosting_test.py
@@ -40,10 +40,10 @@ def test_gradient_boosting_regressor():
     np.testing.assert_equal(len(predicted_labels), len(Y_IRIS))
 
     np.testing.assert_equal(count_false, 150)
-    np.testing.assert_almost_equal(out_metrics["mae"], 0.03101, decimal=4)
-    np.testing.assert_almost_equal(out_metrics["mse"], 0.00434, decimal=4)
-    np.testing.assert_almost_equal(out_metrics["rmse"], 0.06593, decimal=4)
-    np.testing.assert_almost_equal(out_metrics["r2"], 0.99377, decimal=4)
+    np.testing.assert_equal(out_metrics["mae"], 0.031)
+    np.testing.assert_equal(out_metrics["mse"], 0.004)
+    np.testing.assert_equal(out_metrics["rmse"], 0.066)
+    np.testing.assert_equal(out_metrics["r2"], 0.994)
 
 
 def test_invalid_learning_rate():
diff --git a/tests/prediction/random_forest_test.py b/tests/prediction/random_forest_test.py
@@ -37,10 +37,10 @@ def test_random_forest_regressor():
     np.testing.assert_equal(len(predicted_labels), len(Y_IRIS))
 
     np.testing.assert_equal(count_false, 35)
-    np.testing.assert_almost_equal(out_metrics["mae"], 0.01366, decimal=4)
-    np.testing.assert_almost_equal(out_metrics["mse"], 0.00138, decimal=4)
-    np.testing.assert_almost_equal(out_metrics["rmse"], 0.03719, decimal=4)
-    np.testing.assert_almost_equal(out_metrics["r2"], 0.99802, decimal=4)
+    np.testing.assert_equal(out_metrics["mae"], 0.014)
+    np.testing.assert_equal(out_metrics["mse"], 0.001)
+    np.testing.assert_equal(out_metrics["rmse"], 0.037)
+    np.testing.assert_equal(out_metrics["r2"], 0.998)
 
 
 def test_random_forest_invalid_n_estimators():