Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/webapp/databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from google.api_core import exceptions as gcs_errors
from .validation_extension import generate_extension_schema
from .config import databricks_vars, gcs_vars
from .utilities import databricksify_inst_name, SchemaType
from .utilities import SchemaType
from edvise.utils.databricks import databricksify_inst_name
from typing import List, Any, Dict, Optional
from fastapi import HTTPException
import requests
Expand Down
2 changes: 1 addition & 1 deletion src/webapp/routers/front_end_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
BaseUser,
str_to_uuid,
get_current_active_user,
databricksify_inst_name,
)
from edvise.utils.databricks import databricksify_inst_name

from ..database import (
get_session,
Expand Down
31 changes: 0 additions & 31 deletions src/webapp/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,34 +422,3 @@ def get_external_bucket_name_from_uuid(inst_id: uuid.UUID) -> Any:
def get_external_bucket_name(inst_id: str) -> Any:
"""Get the GCP bucket name which has the env prepended taking in the uuid as str."""
return prepend_env_prefix(inst_id)


def databricksify_inst_name(inst_name: str) -> str:
"""
Follow DK standardized rules for naming conventions used in Databricks.
"""
name = inst_name.lower()
# This needs to be in order from most verbose and encompassing other replacement keys to least.
dk_replacements = {
"community technical college": "ctc",
"community college": "cc",
"of science and technology": "st",
"university": "uni",
"college": "col",
}

for old, new in dk_replacements.items():
name = name.replace(old, new)
special_char_replacements = {" & ": " ", "&": " ", "-": " "}

for old, new in special_char_replacements.items():
name = name.replace(old, new)

# Databricks uses underscores, so we'll do that here.
final_name = name.replace(" ", "_")

# Check to see that no special characters exist
pattern = "^[a-z0-9_]*$"
if not re.match(pattern, final_name):
raise ValueError("Unexpected character found in Databricks compatible name.")
return final_name
32 changes: 0 additions & 32 deletions src/webapp/utilities_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
has_access_to_inst_or_err,
has_full_data_access_or_err,
uuid_to_str,
databricksify_inst_name,
)

from .test_helper import USR, DATAKINDER, VIEWER, UUID_INVALID, USER_VALID_INST_UUID
Expand Down Expand Up @@ -41,34 +40,3 @@ def test_has_full_data_access_or_err():
has_full_data_access_or_err(VIEWER, "models")
assert err.value.status_code == 401
assert err.value.detail == "Not authorized to view models for this institution."


def test_databricksify_inst_name():
"""
Testing databricksifying institution name
"""
assert (
databricksify_inst_name("Motlow State Community College") == "motlow_state_cc"
)
assert (
databricksify_inst_name("Metro State University Denver")
== "metro_state_uni_denver"
)
assert databricksify_inst_name("Kentucky State University") == "kentucky_state_uni"
assert databricksify_inst_name("Central Arizona College") == "central_arizona_col"
assert (
databricksify_inst_name("Harrisburg University of Science and Technology")
== "harrisburg_uni_st"
)
assert (
databricksify_inst_name("Southeast Kentucky community technical college")
== "southeast_kentucky_ctc"
)
assert (
databricksify_inst_name("Northwest State Community College")
== "northwest_state_cc"
)

with pytest.raises(ValueError) as err:
databricksify_inst_name("Northwest (invalid)")
assert str(err.value) == "Unexpected character found in Databricks compatible name."
Loading