mapup · varun-andhra-mapup · Jul 30, 2024 · Jul 31, 2024 · Jul 31, 2024 · Aug 1, 2024
diff --git a/hydrography-approach/config.yml b/hydrography-approach/config.yml
@@ -35,4 +35,8 @@ output_files:
   bridge_with_proj_points: "output-data/{{ state }}/csv-files/bridge-osm-association-with-projected-points.csv"
   bridge_match_percentage: "output-data/{{ state }}/csv-files/Association-match-check-with-percentage.csv"
   final_bridges_csv: "output-data/{{ state }}/csv-files/Final-bridges-with-percentage-match.csv"
+
+logging:
+  log_file_path: "hydrography-pipeline.log"
+
 
diff --git a/hydrography-approach/processing_scripts/associate_data/calculate_match_percentage.py b/hydrography-approach/processing_scripts/associate_data/calculate_match_percentage.py
@@ -2,33 +2,52 @@
 from fuzzywuzzy import fuzz
 
 
-# Function to calculate similarity
-def calculate_osm_similarity(row):
+def calculate_osm_similarity(row: pd.Series) -> int:
+    """
+    Calculate the similarity between OSM name and Facility Carried By Structure.
+    """
     return fuzz.token_sort_ratio(
         row["osm_name"], row["7 - Facility Carried By Structure"]
     )
 
 
-def calculate_nhd_similarity(row):
+def calculate_nhd_similarity(row: pd.Series) -> int:
+    """
+    Calculate the similarity between stream name and Features Intersected.
+    """
     return fuzz.token_sort_ratio(row["stream_name"], row["6A - Features Intersected"])
 
 
-def calculate_cross_similarity_1(row):
+def calculate_cross_similarity_1(row: pd.Series) -> int:
+    """
+    Calculate the similarity between OSM name and Features Intersected.
+    """
     return fuzz.token_sort_ratio(row["osm_name"], row["6A - Features Intersected"])
 
 
-def calculate_cross_similarity_2(row):
+def calculate_cross_similarity_2(row: pd.Series) -> int:
+    """
+    Calculate the similarity between stream name and Facility Carried By Structure.
+    """
     return fuzz.token_sort_ratio(
         row["stream_name"], row["7 - Facility Carried By Structure"]
     )
 
 
-def run(bridge_with_proj_points, bridge_match_percentage):
-    df = pd.read_csv(bridge_with_proj_points)
+def run(bridge_with_proj_points: str, bridge_match_percentage: str) -> None:
+    """
+    Read the CSV file, calculate similarity scores, and save the results to a CSV file.
+    """
+    try:
+        df = pd.read_csv(bridge_with_proj_points)
 
-    # Apply the function row-wise
-    df["osm_similarity"] = df.apply(calculate_osm_similarity, axis=1)
-    df["nhd_similarity"] = df.apply(calculate_nhd_similarity, axis=1)
+        # Apply the function row-wise
+        df["osm_similarity"] = df.apply(calculate_osm_similarity, axis=1)
+        df["nhd_similarity"] = df.apply(calculate_nhd_similarity, axis=1)
 
-    # Save the DataFrame with similarity scores
-    df.to_csv(bridge_match_percentage, index=False)
+        # Save the DataFrame with similarity scores
+        df.to_csv(bridge_match_percentage, index=False)
+
+    except Exception as e:
+        print(f"Error processing similarity calculations: {e}")
+        raise