Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions hydrography-approach/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,8 @@ output_files:
bridge_with_proj_points: "output-data/{{ state }}/csv-files/bridge-osm-association-with-projected-points.csv"
bridge_match_percentage: "output-data/{{ state }}/csv-files/Association-match-check-with-percentage.csv"
final_bridges_csv: "output-data/{{ state }}/csv-files/Final-bridges-with-percentage-match.csv"

logging:
log_file_path: "hydrography-pipeline.log"


Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,52 @@
from fuzzywuzzy import fuzz


# Function to calculate similarity
def calculate_osm_similarity(row):
def calculate_osm_similarity(row: pd.Series) -> int:
"""
Calculate the similarity between OSM name and Facility Carried By Structure.
"""
return fuzz.token_sort_ratio(
row["osm_name"], row["7 - Facility Carried By Structure"]
)


def calculate_nhd_similarity(row):
def calculate_nhd_similarity(row: pd.Series) -> int:
"""
Calculate the similarity between stream name and Features Intersected.
"""
return fuzz.token_sort_ratio(row["stream_name"], row["6A - Features Intersected"])


def calculate_cross_similarity_1(row):
def calculate_cross_similarity_1(row: pd.Series) -> int:
"""
Calculate the similarity between OSM name and Features Intersected.
"""
return fuzz.token_sort_ratio(row["osm_name"], row["6A - Features Intersected"])


def calculate_cross_similarity_2(row):
def calculate_cross_similarity_2(row: pd.Series) -> int:
"""
Calculate the similarity between stream name and Facility Carried By Structure.
"""
return fuzz.token_sort_ratio(
row["stream_name"], row["7 - Facility Carried By Structure"]
)


def run(bridge_with_proj_points, bridge_match_percentage):
df = pd.read_csv(bridge_with_proj_points)
def run(bridge_with_proj_points: str, bridge_match_percentage: str) -> None:
"""
Read the CSV file, calculate similarity scores, and save the results to a CSV file.
"""
try:
df = pd.read_csv(bridge_with_proj_points)

# Apply the function row-wise
df["osm_similarity"] = df.apply(calculate_osm_similarity, axis=1)
df["nhd_similarity"] = df.apply(calculate_nhd_similarity, axis=1)
# Apply the function row-wise
df["osm_similarity"] = df.apply(calculate_osm_similarity, axis=1)
df["nhd_similarity"] = df.apply(calculate_nhd_similarity, axis=1)

# Save the DataFrame with similarity scores
df.to_csv(bridge_match_percentage, index=False)
# Save the DataFrame with similarity scores
df.to_csv(bridge_match_percentage, index=False)

except Exception as e:
print(f"Error processing similarity calculations: {e}")
raise
Loading