Skip to content

[AQUA] Adding ADS support for embedding models in Multi Model Deployment #1163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Apr 25, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ads/aqua/common/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,9 @@ class AquaMultiModelRef(Serializable):
The name of the model.
gpu_count : Optional[int]
Number of GPUs required for deployment.
model_task : Optional[str]
The task that model operates on.
If specified, overrides by-default completion | chat inference endpoints with embedding endpoint.
env_var : Optional[Dict[str, Any]]
Optional environment variables to override during deployment.
artifact_location : Optional[str]
Expand All @@ -162,6 +165,7 @@ class AquaMultiModelRef(Serializable):
gpu_count: Optional[int] = Field(
None, description="The gpu count allocation for the model."
)
model_task: Optional[str] = Field(None, description="The task that model operates on.")
env_var: Optional[dict] = Field(
default_factory=dict, description="The environment variables of the model."
)
Expand Down
5 changes: 5 additions & 0 deletions ads/aqua/model/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,8 @@ class FineTuningCustomMetadata(ExtendedEnum):
class MultiModelSupportedTaskType(ExtendedEnum):
TEXT_GENERATION = "text-generation"
TEXT_GENERATION_ALT = "text_generation"
EMBEDDING_ALT = "text_embedding"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we add embedding as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed- we add embedding in SMC level


class MultiModelConfigMode(ExtendedEnum):
EMBEDDING = "embedding"
DEFAULT = "completion"
16 changes: 15 additions & 1 deletion ads/aqua/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
ImportModelDetails,
ModelValidationResult,
)
from ads.aqua.model.enums import MultiModelSupportedTaskType
from ads.aqua.model.enums import MultiModelSupportedTaskType, MultiModelConfigMode
from ads.common.auth import default_signer
from ads.common.oci_resource import SEARCH_TYPE, OCIResource
from ads.common.utils import (
Expand Down Expand Up @@ -316,6 +316,11 @@ def create_multi(

display_name_list.append(display_name)

model_task = source_model.freeform_tags.get(Tags.TASK, UNKNOWN)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather to move this logic to the _get_task() method

model.taks = self._get_task(model, source_model)
def _get_task(model_ref:AquaMultiModelRef, source_model: DataScienceModel) -> str:
    # extract task from model_ref by itself, if task is not presented there, then extract it from the freeform tags. 
    # model_task = source_model.freeform_tags.get(Tags.TASK, UNKNOWN)
    
    ....
    return taks

I believe we should also allow users to pass task within AquaMultiModelRef, just in case if the tags were not populated well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we allow user to pass task, or if not provided, use the freeform tags of the source model


if model_task != UNKNOWN:
self._get_task(model, model_task)

# Retrieve model artifact
model_artifact_path = source_model.artifact
if not model_artifact_path:
Expand Down Expand Up @@ -704,6 +709,15 @@ def edit_registered_model(
else:
raise AquaRuntimeError("Only registered unverified models can be edited.")

def _get_task(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like this method doesn't return any value, yet its signature indicates a return type of str. Should we update the type hint to reflect that it returns None, or adjust the implementation to return a string as specified?

self,
model: AquaMultiModelRef,
freeform_task_tag: str
) -> str:
"""In a Multi Model Deployment, will set model task if freeform task tag from model needs a non-completion endpoint (embedding)"""
if freeform_task_tag == MultiModelSupportedTaskType.EMBEDDING_ALT:
model.model_task = MultiModelConfigMode.EMBEDDING

def _fetch_metric_from_metadata(
self,
custom_metadata_list: ModelCustomMetadata,
Expand Down
10 changes: 6 additions & 4 deletions ads/aqua/modeldeployment/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,7 @@ def create(
# validate instance shape availability in compartment
available_shapes = [
shape.name.lower()
for shape in self.list_shapes(
compartment_id=compartment_id
)
for shape in self.list_shapes(compartment_id=compartment_id)
]

if create_deployment_details.instance_shape.lower() not in available_shapes:
Expand Down Expand Up @@ -645,7 +643,11 @@ def _create_multi(
os_path = ObjectStorageDetails.from_path(artifact_path_prefix)
artifact_path_prefix = os_path.filepath.rstrip("/")

model_config.append({"params": params, "model_path": artifact_path_prefix})
# override by-default completion/ chat endpoint with other endpoint (embedding)
config_data = {"params": params, "model_path": artifact_path_prefix}
if model.model_task:
config_data["model_task"] = model.model_task
model_config.append(config_data)
model_name_list.append(model.model_name)

env_var.update({AQUA_MULTI_MODEL_CONFIG: json.dumps({"models": model_config})})
Expand Down
14 changes: 12 additions & 2 deletions tests/unitary/with_extras/aqua/test_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ class TestDataset:
"environment_configuration_type": "OCIR_CONTAINER",
"environment_variables": {
"MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions",
"MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/"}]}',
"MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/", "model_task": "embedding"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/"}]}',
},
"health_check_port": 8080,
"image": "dsmc://image-name:1.0.0.0",
Expand Down Expand Up @@ -486,27 +486,30 @@ class TestDataset:
"gpu_count": 2,
"model_id": "test_model_id_1",
"model_name": "test_model_1",
"model_task": "embedding",
"artifact_location": "test_location_1",
},
{
"env_var": {},
"gpu_count": 2,
"model_id": "test_model_id_2",
"model_name": "test_model_2",
"model_task": None,
"artifact_location": "test_location_2",
},
{
"env_var": {},
"gpu_count": 2,
"model_id": "test_model_id_3",
"model_name": "test_model_3",
"model_task": None,
"artifact_location": "test_location_3",
},
],
"model_id": "ocid1.datasciencemodel.oc1.<region>.<OCID>",
"environment_variables": {
"MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions",
"MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/"}]}',
"MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/", "model_task": "embedding"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/"}]}',
},
"cmd": [],
"console_link": "https://cloud.oracle.com/data-science/model-deployments/ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>?region=region-name",
Expand Down Expand Up @@ -965,20 +968,23 @@ class TestDataset:
"gpu_count": 1,
"model_id": "ocid1.compartment.oc1..<OCID>",
"model_name": "model_one",
"model_task": "embedding",
"artifact_location": "artifact_location_one",
},
{
"env_var": {"--test_key_two": "test_value_two"},
"gpu_count": 1,
"model_id": "ocid1.compartment.oc1..<OCID>",
"model_name": "model_two",
"model_task": None,
"artifact_location": "artifact_location_two",
},
{
"env_var": {"--test_key_three": "test_value_three"},
"gpu_count": 1,
"model_id": "ocid1.compartment.oc1..<OCID>",
"model_name": "model_three",
"model_task": None,
"artifact_location": "artifact_location_three",
},
]
Expand Down Expand Up @@ -1787,20 +1793,23 @@ def test_create_deployment_for_multi_model(
model_info_1 = AquaMultiModelRef(
model_id="test_model_id_1",
model_name="test_model_1",
model_task="embedding",
gpu_count=2,
artifact_location="test_location_1",
)

model_info_2 = AquaMultiModelRef(
model_id="test_model_id_2",
model_name="test_model_2",
model_task=None,
gpu_count=2,
artifact_location="test_location_2",
)

model_info_3 = AquaMultiModelRef(
model_id="test_model_id_3",
model_name="test_model_3",
model_task=None,
gpu_count=2,
artifact_location="test_location_3",
)
Expand All @@ -1826,6 +1835,7 @@ def test_create_deployment_for_multi_model(

expected_attributes = set(AquaDeployment.__annotations__.keys())
actual_attributes = result.to_dict()

assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch"
expected_result = copy.deepcopy(TestDataset.aqua_multi_deployment_object)
expected_result["state"] = "CREATING"
Expand Down