diff --git a/src/geogenalg/application/generalize_fences.py b/src/geogenalg/application/generalize_fences.py index 8b33083..1f7a68f 100644 --- a/src/geogenalg/application/generalize_fences.py +++ b/src/geogenalg/application/generalize_fences.py @@ -6,160 +6,171 @@ # LICENSE file in the root directory of this source tree. from dataclasses import dataclass -from pathlib import Path +from typing import override -import geopandas as gpd -import pandas as pd from cartagen.utils.partitioning.network import network_faces +from geopandas import GeoDataFrame +from pandas import concat from geogenalg import continuity, merge, selection +from geogenalg.application import BaseAlgorithm +from geogenalg.core.exceptions import GeometryTypeError +from geogenalg.utility.validation import check_gdf_geometry_type -@dataclass -class AlgorithmOptions: - """Options for generalize fences algorithm. +@dataclass(frozen=True) +class GeneralizeFences(BaseAlgorithm): + """Generalize lines representing fences. - Attributes: - closing_fence_area_threshold: Minimum area for a fence-enclosed region - closing_fence_area_with_mast_threshold: Minimum area for a fence-enclosed - region containing a mast - fence_length_threshold: Minimum length for a fence line - fence_length_threshold_in_closed_area: Minimum length for a fence line within a - closed area - simplification_tolerance: Tolerance used for geometry simplification - gap_threshold: Maximum gap between two fence lines to be connected with - a helper line - attribute_for_line_merge: Name of the attribute to determine which line - features can be merged + Reference data should contain a Point GeoDataFrame with the key + "masts". - """ - - closing_fence_area_threshold: float - closing_fence_area_with_mast_threshold: float - fence_length_threshold: float - fence_length_threshold_in_closed_area: float - simplification_tolerance: float - gap_threshold: float - attribute_for_line_merge: str - - -def create_generalized_fences( - input_path: Path | str, - fences_layer_name: str, - masts_layer_name: str, - options: AlgorithmOptions, - output_path: str, -) -> None: - """Create GeoDataFrames and pass them to the generalization function. - - Args: - ---- - input_path: Path to the input GeoPackage - fences_layer_name: Name of the layer for fences - masts_layer_name: Name of the layer for masts - options: Algorithm parameters for generalize fences - output_path: Path to save the output GeoPackage - - Raises: - ------ - FileNotFoundError: If the input_path does not exist + Output contains the generalized line fences. + The algorithm does the following steps: + - Merges line segments + - Adds helper lines to close small gaps between lines + - Removes short lines within large enough enclosed areas + - Removes surrounding lines of small enough enclosed areas + - Removes close enough lines surrounding a mast + - Removes all short lines + - Simplifies all lines """ - if not Path(input_path).resolve().exists(): - raise FileNotFoundError - - fences_gdf = gpd.read_file(input_path, layer=fences_layer_name) - - masts_gdf = gpd.read_file(input_path, layer=masts_layer_name) - - result = generalize_fences( - fences_gdf, - masts_gdf, - options, - ) - - result.to_file(output_path, driver="GPKG") - -def generalize_fences( - fences_gdf: gpd.GeoDataFrame, masts_gdf: gpd.GeoDataFrame, options: AlgorithmOptions -) -> gpd.GeoDataFrame: - """Generalize the LineString layer representing fences. - - Args: - ---- - fences_gdf: A GeoDataFrame containing the fence lines to be generalized - masts_gdf: A GeoDataFrame containing masts - options: Algorithm parameters for generalize fences - - Returns: - ------- - Generalized fence lines - - """ - result_gdf = fences_gdf.copy() - - # Merge connecting lines with the same attribute value - result_gdf = merge.merge_connecting_lines_by_attribute( - result_gdf, options.attribute_for_line_merge - ) - - # Generate helper lines to close small gaps - helper_lines_gdf = continuity.connect_nearby_endpoints( - result_gdf, options.gap_threshold - ) - - # Combine original fence lines with helper lines - combined_gdf: gpd.GeoDataFrame - combined_gdf = pd.concat([result_gdf, helper_lines_gdf], ignore_index=True) - - # Calculate the CartaGen network faces to fill closing line geometries - faces = network_faces(list(combined_gdf.geometry), convex_hull=False) - faces_gdf = gpd.GeoDataFrame(geometry=list(faces)) - - # Dissolve adjacent polygons into larger contiguous areas - faces_gdf = faces_gdf.dissolve(as_index=False) - faces_gdf = faces_gdf.union_all() - - # Select fence lines shorter than the threshold for enclosed areas - short_lines = result_gdf[ - result_gdf.geometry.length < options.fence_length_threshold_in_closed_area - ] - - # Remove short fence lines that are within closed polygonal areas - to_remove_idx = short_lines[short_lines.geometry.apply(faces_gdf.contains)].index - result_gdf = result_gdf.drop(index=to_remove_idx) - - # Convert MultiPolygon result back into a proper GeoDataFrame - polygons = list(faces_gdf.geoms) - faces_gdf = gpd.GeoDataFrame(geometry=polygons, crs=fences_gdf.crs) - - # Remove polygons whose area exceeds the closing_fence_area_with_mast_threshold and - # the closing_fence_area_threshold - polygon_gdf_with_point, polygon_gdf_without_point = ( - selection.split_polygons_by_point_intersection(faces_gdf, masts_gdf) - ) - polygon_gdf_with_point = selection.remove_large_polygons( - polygon_gdf_with_point, options.closing_fence_area_with_mast_threshold - ) - polygon_gdf_without_point = selection.remove_large_polygons( - polygon_gdf_without_point, options.closing_fence_area_threshold - ) - - # Combine filtered polygons - faces_gdf = pd.concat( - [polygon_gdf_with_point, polygon_gdf_without_point], ignore_index=True - ) - - # Remove the surrounding fence lines of small closed areas with masts considered - result_gdf = selection.remove_parts_of_lines_on_polygon_edges(result_gdf, faces_gdf) - - # Remove short fence lines - result_gdf = selection.remove_disconnected_short_lines( - result_gdf, options.fence_length_threshold - ) - - # Return simplified fence lines - result_gdf.geometry = result_gdf.geometry.simplify(options.simplification_tolerance) - - return result_gdf + closing_fence_area_threshold: float = 2000.0 + """Minimum area for a fence-enclosed region.""" + closing_fence_area_with_mast_threshold: float = 8000.0 + """Minimum area for a fence-enclosed region containing a mast.""" + fence_length_threshold: float = 80.0 + """Minimum length for a fence line.""" + fence_length_threshold_in_closed_area: float = 300.0 + """Minimum length for a fence line within a closed area.""" + simplification_tolerance: float = 4.0 + """Tolerance used for geometry simplification.""" + gap_threshold: float = 25.0 + """Maximum gap between two fence lines to be connected with a helper line.""" + attribute_for_line_merge: str = "kohdeluokka" + """Name of the attribute to determine which line features can be merged.""" + + @override + def _execute( + self, + data: GeoDataFrame, + reference_data: dict[str, GeoDataFrame], + ) -> GeoDataFrame: + """Execute algorithm. + + Args: + ---- + data: A GeoDataFrame containing the fence lines to be generalized. + reference_data: Should contain a Point GeoDataFrame with the key + "masts". + + Returns: + ------- + GeoDataFrame containing the generalized fence lines. + + Raises: + ------ + GeometryTypeError: If `data` contains non-line geometries or the + GeoDataFrame with key "masts" in `reference_data` contains + non-point geometries. + KeyError: If `reference_data` does not contain data with key + "masts" or input data does not have specified + `attribute_for_line_merge`. + + """ + if not check_gdf_geometry_type(data, ["LineString"]): + msg = "GeneralizeFences works only with LineString geometries." + raise GeometryTypeError(msg) + if "masts" not in reference_data: + msg = ( + "GeneralizeFences requires mast Point GeoDataFrame" + + " in reference_data with key 'masts'." + ) + raise KeyError(msg) + if not check_gdf_geometry_type(reference_data["masts"], ["Point"]): + msg = "Masts data should be a Point GeoDataFrame." + raise GeometryTypeError(msg) + if self.attribute_for_line_merge not in data.columns: + msg = ( + "Specified `attribute_for_line_merge` " + + f"({self.attribute_for_line_merge}) not found in input GeoDataFrame." + ) + raise KeyError(msg) + + result_gdf = data.copy() + + # Merge connecting lines with the same attribute value + result_gdf = merge.merge_connecting_lines_by_attribute( + result_gdf, self.attribute_for_line_merge + ) + + # Generate helper lines to close small gaps + helper_lines_gdf = continuity.connect_nearby_endpoints( + result_gdf, self.gap_threshold + ) + + # Combine original fence lines with helper lines + combined_gdf: GeoDataFrame = concat( + [result_gdf, helper_lines_gdf], ignore_index=True + ) + + # Calculate the CartaGen network faces to fill closing line geometries + faces = network_faces(list(combined_gdf.geometry), convex_hull=False) + faces_gdf = GeoDataFrame(geometry=list(faces)) + + # Dissolve adjacent polygons into larger contiguous areas + faces_gdf = faces_gdf.dissolve(as_index=False) + faces_gdf = faces_gdf.union_all() + + # Select fence lines shorter than the threshold for enclosed areas + short_lines = result_gdf[ + result_gdf.geometry.length < self.fence_length_threshold_in_closed_area + ] + + # Remove short fence lines that are within closed polygonal areas + to_remove_idx = short_lines[ + short_lines.geometry.apply(faces_gdf.contains) + ].index + result_gdf = result_gdf.drop(index=to_remove_idx) + + # Convert MultiPolygon result back into a proper GeoDataFrame + polygons = list(faces_gdf.geoms) + faces_gdf = GeoDataFrame(geometry=polygons, crs=data.crs) + + # Remove polygons whose area exceeds the closing_fence_area_with_mast_threshold + # and the closing_fence_area_threshold + polygon_gdf_with_point, polygon_gdf_without_point = ( + selection.split_polygons_by_point_intersection( + faces_gdf, reference_data["masts"] + ) + ) + polygon_gdf_with_point = selection.remove_large_polygons( + polygon_gdf_with_point, self.closing_fence_area_with_mast_threshold + ) + polygon_gdf_without_point = selection.remove_large_polygons( + polygon_gdf_without_point, self.closing_fence_area_threshold + ) + + # Combine filtered polygons + faces_gdf = concat( + [polygon_gdf_with_point, polygon_gdf_without_point], ignore_index=True + ) + + # Remove the surrounding fence lines of small closed areas with masts considered + result_gdf = selection.remove_parts_of_lines_on_polygon_edges( + result_gdf, faces_gdf + ) + + # Remove short fence lines + result_gdf = selection.remove_disconnected_short_lines( + result_gdf, self.fence_length_threshold + ) + + # Return simplified fence lines + result_gdf.geometry = result_gdf.geometry.simplify( + self.simplification_tolerance + ) + + return result_gdf diff --git a/src/geogenalg/main.py b/src/geogenalg/main.py index 2949f33..1ec75e0 100644 --- a/src/geogenalg/main.py +++ b/src/geogenalg/main.py @@ -23,6 +23,7 @@ from geogenalg.application.generalize_clusters_to_centroids import ( GeneralizePointClustersAndPolygonsToCentroids, ) +from geogenalg.application.generalize_fences import GeneralizeFences from geogenalg.utility.dataframe_processing import read_gdf_from_file_and_set_index GEOPACKAGE_URI_HELP = ( @@ -270,6 +271,7 @@ def build_app() -> None: """ commands_and_algs = { "clusters_to_centroids": GeneralizePointClustersAndPolygonsToCentroids, + "fences": GeneralizeFences, } for cli_command_name, alg in commands_and_algs.items(): diff --git a/test/application/test_generalize_fences.py b/test/application/test_generalize_fences.py index 35370e2..cf1fe77 100644 --- a/test/application/test_generalize_fences.py +++ b/test/application/test_generalize_fences.py @@ -5,30 +5,65 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -import tempfile from pathlib import Path +from tempfile import TemporaryDirectory -import geopandas as gpd -import geopandas.testing +import pytest +from geopandas import GeoDataFrame, read_file +from geopandas.testing import assert_geodataframe_equal +from shapely import LineString, Point -from geogenalg.application.generalize_fences import AlgorithmOptions, generalize_fences +from geogenalg.application.generalize_fences import GeneralizeFences +from geogenalg.core.exceptions import GeometryTypeError +from geogenalg.utility.dataframe_processing import read_gdf_from_file_and_set_index +UNIQUE_ID_COLUMN = "mtk_id" -def test_generalize_fences_50k( - testdata_path: Path, -) -> None: - """ - Test generalizing fences with parameters to the 1: 50 000 scale - """ - source_path = testdata_path / "fences_rovaniemi.gpkg" - temp_dir = tempfile.TemporaryDirectory() +def test_generalize_fences_50k(testdata_path: Path) -> None: + """Test generalizing fences with parameters to the 1: 50 000 scale.""" + input_path = testdata_path / "fences_rovaniemi.gpkg" + input_data = read_gdf_from_file_and_set_index( + input_path, + UNIQUE_ID_COLUMN, + layer="mtk_fences", + ) + + masts_data = read_file(input_path, layer="mtk_masts") + + temp_dir = TemporaryDirectory() output_path = temp_dir.name + "/generalized_fences.gpkg" - fences_gdf = gpd.read_file(source_path, layer="mtk_fences") - masts_gdf = gpd.read_file(source_path, layer="mtk_masts") + algorithm = GeneralizeFences( + closing_fence_area_threshold=2000, + closing_fence_area_with_mast_threshold=8000, + fence_length_threshold=80, + fence_length_threshold_in_closed_area=300, + simplification_tolerance=4, + gap_threshold=25, + attribute_for_line_merge="kohdeluokka", + ) + + control = read_gdf_from_file_and_set_index( + input_path, + UNIQUE_ID_COLUMN, + layer="generalized_fences", + ) + algorithm.execute(input_data, reference_data={"masts": masts_data}).to_file( + output_path, layer="result" + ) + + result = read_gdf_from_file_and_set_index(output_path, "index", layer="result") - options = AlgorithmOptions( + control = control.sort_values("geometry").reset_index(drop=True) + result = result.sort_values("geometry").reset_index(drop=True) + assert_geodataframe_equal(control, result) + + +def test_generalize_fences_50k_invalid_geometry_type() -> None: + input_data = GeoDataFrame({"id": [1]}, geometry=[Point(0, 0)]) + + algorithm = GeneralizeFences( closing_fence_area_threshold=2000, closing_fence_area_with_mast_threshold=8000, fence_length_threshold=80, @@ -38,21 +73,60 @@ def test_generalize_fences_50k( attribute_for_line_merge="kohdeluokka", ) - result = generalize_fences(fences_gdf, masts_gdf, options) + with pytest.raises( + GeometryTypeError, + match=r"GeneralizeFences works only with LineString geometries.", + ): + algorithm.execute(data=input_data, reference_data={}) - assert result is not None - result.to_file(output_path, layer="fences_50k") +def test_generalize_fences_50k_missing_masts_data(testdata_path: Path) -> None: + input_path = testdata_path / "fences_rovaniemi.gpkg" + input_data = read_gdf_from_file_and_set_index( + input_path, + UNIQUE_ID_COLUMN, + layer="mtk_fences", + ) - control_fences: gpd.GeoDataFrame = gpd.read_file( - source_path, layer="generalized_fences" + algorithm = GeneralizeFences( + closing_fence_area_threshold=2000, + closing_fence_area_with_mast_threshold=8000, + fence_length_threshold=80, + fence_length_threshold_in_closed_area=300, + simplification_tolerance=4, + gap_threshold=25, + attribute_for_line_merge="kohdeluokka", ) - result_fences = gpd.read_file(output_path, layer="fences_50k") + with pytest.raises( + KeyError, + match=r"GeneralizeFences requires mast Point GeoDataFrame in reference_data with key 'masts'.", + ): + algorithm.execute(data=input_data, reference_data={}) - control_fences = control_fences.sort_values("geometry").reset_index(drop=True) - result_fences = result_fences.sort_values("geometry").reset_index(drop=True) - geopandas.testing.assert_geodataframe_equal( - control_fences, result_fences, check_index_type=False +def test_generalize_fences_50k_invalid_geometry_type_masts(testdata_path: Path) -> None: + input_path = testdata_path / "fences_rovaniemi.gpkg" + input_data = read_gdf_from_file_and_set_index( + input_path, + UNIQUE_ID_COLUMN, + layer="mtk_fences", + ) + masts_data = GeoDataFrame( + {"id": [1]}, geometry=[LineString((Point(0, 0), Point(1, 0)))] + ) + + algorithm = GeneralizeFences( + closing_fence_area_threshold=2000, + closing_fence_area_with_mast_threshold=8000, + fence_length_threshold=80, + fence_length_threshold_in_closed_area=300, + simplification_tolerance=4, + gap_threshold=25, + attribute_for_line_merge="kohdeluokka", ) + + with pytest.raises( + GeometryTypeError, match=r"Masts data should be a Point GeoDataFrame." + ): + algorithm.execute(data=input_data, reference_data={"masts": masts_data})