Skip to content

Commit 9581b74

Browse files
committed
feat(exploratory-analyses): add band parameter to descriptive statistics raster, rename CLI parameter from input_file to input_raster
1 parent 1d69439 commit 9581b74

File tree

3 files changed

+42
-17
lines changed

3 files changed

+42
-17
lines changed

eis_toolkit/cli.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -786,15 +786,15 @@ def compute_pca_vector_cli(
786786

787787
# DESCRIPTIVE STATISTICS (RASTER)
788788
@app.command()
789-
def descriptive_statistics_raster_cli(input_file: INPUT_FILE_OPTION):
789+
def descriptive_statistics_raster_cli(input_raster: INPUT_FILE_OPTION, band: int = 1):
790790
"""Generate descriptive statistics from raster data."""
791791
from eis_toolkit.exploratory_analyses.descriptive_statistics import descriptive_statistics_raster
792792

793793
typer.echo("Progress: 10%")
794794

795-
with rasterio.open(input_file) as raster:
795+
with rasterio.open(input_raster) as raster:
796796
typer.echo("Progress: 25%")
797-
results_dict = descriptive_statistics_raster(raster)
797+
results_dict = descriptive_statistics_raster(raster, band)
798798
typer.echo("Progress: 75%")
799799

800800
typer.echo("Progress: 100% \n")

eis_toolkit/exploratory_analyses/descriptive_statistics.py

+38-13
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
import pandas as pd
44
import rasterio
55
from beartype import beartype
6-
from beartype.typing import Union
6+
from beartype.typing import Dict, Union
77
from statsmodels.stats import stattools
88
from statsmodels.stats.weightstats import DescrStatsW
99

10-
from eis_toolkit.exceptions import InvalidColumnException
10+
from eis_toolkit.exceptions import InvalidColumnException, InvalidRasterBandException
1111

1212

13-
def _descriptive_statistics(data: Union[rasterio.io.DatasetReader, pd.DataFrame, gpd.GeoDataFrame]) -> dict:
13+
def _descriptive_statistics(data: Union[rasterio.io.DatasetReader, pd.DataFrame, gpd.GeoDataFrame]) -> Dict[str, float]:
1414
statistics = DescrStatsW(data)
1515
min = np.min(data)
1616
max = np.max(data)
@@ -38,14 +38,25 @@ def _descriptive_statistics(data: Union[rasterio.io.DatasetReader, pd.DataFrame,
3838

3939

4040
@beartype
41-
def descriptive_statistics_dataframe(input_data: Union[pd.DataFrame, gpd.GeoDataFrame], column: str) -> dict:
42-
"""Generate descriptive statistics from vector data.
41+
def descriptive_statistics_dataframe(
42+
input_data: Union[pd.DataFrame, gpd.GeoDataFrame], column: str
43+
) -> Dict[str, float]:
44+
"""Compute descriptive statistics from vector data.
4345
44-
Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.
46+
Computes the following statistics:
47+
- min
48+
- max
49+
- mean
50+
- quantiles 25%
51+
- quantile 50% (median)
52+
- quantile 75%
53+
- standard deviation
54+
- relative standard deviation
55+
- skewness
4556
4657
Args:
47-
input_data: Data to generate descriptive statistics from.
48-
column: Specify the column to generate descriptive statistics from.
58+
input_data: Input vector data.
59+
column: Column in vector data to compute descriptive statistics from.
4960
5061
Returns:
5162
The descriptive statistics in previously described order.
@@ -58,19 +69,33 @@ def descriptive_statistics_dataframe(input_data: Union[pd.DataFrame, gpd.GeoData
5869

5970

6071
@beartype
61-
def descriptive_statistics_raster(input_data: rasterio.io.DatasetReader) -> dict:
62-
"""Generate descriptive statistics from raster data.
72+
def descriptive_statistics_raster(input_data: rasterio.io.DatasetReader, band: int = 1) -> Dict[str, float]:
73+
"""Compute descriptive statistics from raster data.
74+
75+
Computes the following statistics:
76+
- min
77+
- max
78+
- mean
79+
- quantiles 25%
80+
- quantile 50% (median)
81+
- quantile 75%
82+
- standard deviation
83+
- relative standard deviation
84+
- skewness
6385
64-
Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.
6586
Nodata values are removed from the data before the statistics are computed.
6687
6788
Args:
68-
input_data: Data to generate descriptive statistics from.
89+
input_data: Input raster data.
90+
band: Raster band to compute descriptive statistics from.
6991
7092
Returns:
7193
The descriptive statistics in previously described order.
7294
"""
73-
data = input_data.read().flatten()
95+
if band not in range(1, input_data.count + 1):
96+
raise InvalidRasterBandException(f"Input raster does not contain the selected band: {band}.")
97+
98+
data = input_data.read(band)
7499
nodata_value = input_data.nodata
75100
data = data[data != nodata_value]
76101
statistics = _descriptive_statistics(data)

tests/exploratory_analyses/descriptive_statistics_test.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def test_descriptive_statistics_geodataframe():
6161

6262
def test_descriptive_statistics_raster():
6363
"""Checks that returned statistics are correct when using numpy.ndarray."""
64-
test = descriptive_statistics_raster(src_raster)
64+
test = descriptive_statistics_raster(src_raster, 1)
6565
np.testing.assert_almost_equal(test["min"], 2.503)
6666
np.testing.assert_almost_equal(test["max"], 9.67)
6767
np.testing.assert_almost_equal(test["mean"], 5.1865644)

0 commit comments

Comments
 (0)