diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py index bd7b2ede8..e33599926 100644 --- a/ads/aqua/common/entities.py +++ b/ads/aqua/common/entities.py @@ -151,6 +151,8 @@ class AquaMultiModelRef(Serializable): The name of the model. gpu_count : Optional[int] Number of GPUs required for deployment. + model_task : Optional[str] + The task that model operates on. Supported tasks are in MultiModelSupportedTaskType env_var : Optional[Dict[str, Any]] Optional environment variables to override during deployment. artifact_location : Optional[str] @@ -162,6 +164,7 @@ class AquaMultiModelRef(Serializable): gpu_count: Optional[int] = Field( None, description="The gpu count allocation for the model." ) + model_task: Optional[str] = Field(None, description="The task that model operates on. Supported tasks are in MultiModelSupportedTaskType") env_var: Optional[dict] = Field( default_factory=dict, description="The environment variables of the model." ) diff --git a/ads/aqua/model/enums.py b/ads/aqua/model/enums.py index 1a21adabc..b953b2383 100644 --- a/ads/aqua/model/enums.py +++ b/ads/aqua/model/enums.py @@ -26,5 +26,7 @@ class FineTuningCustomMetadata(ExtendedEnum): class MultiModelSupportedTaskType(ExtendedEnum): - TEXT_GENERATION = "text-generation" - TEXT_GENERATION_ALT = "text_generation" + TEXT_GENERATION = "text_generation" + IMAGE_TEXT_TO_TEXT = "image_text_to_text" + CODE_SYNTHESIS = "code_synthesis" + EMBEDDING = "text_embedding" diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py index 691c2428a..7fe419280 100644 --- a/ads/aqua/model/model.py +++ b/ads/aqua/model/model.py @@ -4,6 +4,7 @@ import json import os import pathlib +import re from datetime import datetime, timedelta from threading import Lock from typing import Any, Dict, List, Optional, Set, Union @@ -80,6 +81,7 @@ ImportModelDetails, ModelValidationResult, ) +from ads.aqua.model.enums import MultiModelSupportedTaskType from ads.common.auth import default_signer from ads.common.oci_resource import SEARCH_TYPE, OCIResource from ads.common.utils import ( @@ -307,18 +309,10 @@ def create_multi( # "Currently only service models are supported for multi model deployment." # ) - # TODO uncomment the section below if only the specific types of models should be allowed for multi-model deployment - # if ( - # source_model.freeform_tags.get(Tags.TASK, UNKNOWN).lower() - # not in MultiModelSupportedTaskType - # ): - # raise AquaValueError( - # f"Invalid or missing {Tags.TASK} tag for selected model {display_name}. " - # f"Currently only `{MultiModelSupportedTaskType.values()}` models are supported for multi model deployment." - # ) - display_name_list.append(display_name) + self._extract_model_task(model, source_model) + # Retrieve model artifact model_artifact_path = source_model.artifact if not model_artifact_path: @@ -707,6 +701,26 @@ def edit_registered_model( else: raise AquaRuntimeError("Only registered unverified models can be edited.") + def _extract_model_task( + self, + model: AquaMultiModelRef, + source_model: DataScienceModel, + ) -> None: + """In a Multi Model Deployment, will set model_task parameter in AquaMultiModelRef from freeform tags or user""" + # user does not supply model task, we extract from model metadata + if not model.model_task: + model.model_task = source_model.freeform_tags.get(Tags.TASK, UNKNOWN) + + task_tag = re.sub(r"-", "_", model.model_task).lower() + # re-visit logic when more model task types are supported + if task_tag in MultiModelSupportedTaskType: + model.model_task = task_tag + else: + raise AquaValueError( + f"Invalid or missing {task_tag} tag for selected model {source_model.display_name}. " + f"Currently only `{MultiModelSupportedTaskType.values()}` models are supported for multi model deployment." + ) + def _fetch_metric_from_metadata( self, custom_metadata_list: ModelCustomMetadata, diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py index cdc77da3c..90f211af3 100644 --- a/ads/aqua/modeldeployment/deployment.py +++ b/ads/aqua/modeldeployment/deployment.py @@ -178,9 +178,7 @@ def create( # validate instance shape availability in compartment available_shapes = [ shape.name.lower() - for shape in self.list_shapes( - compartment_id=compartment_id - ) + for shape in self.list_shapes(compartment_id=compartment_id) ] if create_deployment_details.instance_shape.lower() not in available_shapes: @@ -645,7 +643,11 @@ def _create_multi( os_path = ObjectStorageDetails.from_path(artifact_path_prefix) artifact_path_prefix = os_path.filepath.rstrip("/") - model_config.append({"params": params, "model_path": artifact_path_prefix}) + # override by-default completion/ chat endpoint with other endpoint (embedding) + config_data = {"params": params, "model_path": artifact_path_prefix} + if model.model_task: + config_data["model_task"] = model.model_task + model_config.append(config_data) model_name_list.append(model.model_name) env_var.update({AQUA_MULTI_MODEL_CONFIG: json.dumps({"models": model_config})}) diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py index b0bc1d5fe..ce27b4348 100644 --- a/tests/unitary/with_extras/aqua/test_deployment.py +++ b/tests/unitary/with_extras/aqua/test_deployment.py @@ -45,6 +45,7 @@ ModelDeploymentConfigSummary, ModelParams, ) +from ads.aqua.model.enums import MultiModelSupportedTaskType from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader from ads.model.datascience_model import DataScienceModel from ads.model.deployment.model_deployment import ModelDeployment @@ -276,7 +277,7 @@ class TestDataset: "environment_configuration_type": "OCIR_CONTAINER", "environment_variables": { "MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions", - "MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/"}]}', + "MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/", "model_task": "text_embedding"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/", "model_task": "image_text_to_text"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/", "model_task": "code_synthesis"}]}', }, "health_check_port": 8080, "image": "dsmc://image-name:1.0.0.0", @@ -486,6 +487,7 @@ class TestDataset: "gpu_count": 2, "model_id": "test_model_id_1", "model_name": "test_model_1", + "model_task": "text_embedding", "artifact_location": "test_location_1", }, { @@ -493,6 +495,7 @@ class TestDataset: "gpu_count": 2, "model_id": "test_model_id_2", "model_name": "test_model_2", + "model_task": "image_text_to_text", "artifact_location": "test_location_2", }, { @@ -500,13 +503,14 @@ class TestDataset: "gpu_count": 2, "model_id": "test_model_id_3", "model_name": "test_model_3", + "model_task": "code_synthesis", "artifact_location": "test_location_3", }, ], "model_id": "ocid1.datasciencemodel.oc1..", "environment_variables": { "MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions", - "MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/"}]}', + "MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/", "model_task": "text_embedding"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/", "model_task": "image_text_to_text"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/", "model_task": "code_synthesis"}]}', }, "cmd": [], "console_link": "https://cloud.oracle.com/data-science/model-deployments/ocid1.datasciencemodeldeployment.oc1..?region=region-name", @@ -965,6 +969,7 @@ class TestDataset: "gpu_count": 1, "model_id": "ocid1.compartment.oc1..", "model_name": "model_one", + "model_task": "text_embedding", "artifact_location": "artifact_location_one", }, { @@ -972,6 +977,7 @@ class TestDataset: "gpu_count": 1, "model_id": "ocid1.compartment.oc1..", "model_name": "model_two", + "model_task": "image_text_to_text", "artifact_location": "artifact_location_two", }, { @@ -979,6 +985,7 @@ class TestDataset: "gpu_count": 1, "model_id": "ocid1.compartment.oc1..", "model_name": "model_three", + "model_task": "code_synthesis", "artifact_location": "artifact_location_three", }, ] @@ -1787,6 +1794,7 @@ def test_create_deployment_for_multi_model( model_info_1 = AquaMultiModelRef( model_id="test_model_id_1", model_name="test_model_1", + model_task="text_embedding", gpu_count=2, artifact_location="test_location_1", ) @@ -1794,6 +1802,7 @@ def test_create_deployment_for_multi_model( model_info_2 = AquaMultiModelRef( model_id="test_model_id_2", model_name="test_model_2", + model_task="image_text_to_text", gpu_count=2, artifact_location="test_location_2", ) @@ -1801,6 +1810,7 @@ def test_create_deployment_for_multi_model( model_info_3 = AquaMultiModelRef( model_id="test_model_id_3", model_name="test_model_3", + model_task="code_synthesis", gpu_count=2, artifact_location="test_location_3", ) @@ -1826,6 +1836,7 @@ def test_create_deployment_for_multi_model( expected_attributes = set(AquaDeployment.__annotations__.keys()) actual_attributes = result.to_dict() + assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch" expected_result = copy.deepcopy(TestDataset.aqua_multi_deployment_object) expected_result["state"] = "CREATING" diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py index 0cb14c98f..1587b7592 100644 --- a/tests/unitary/with_extras/aqua/test_model.py +++ b/tests/unitary/with_extras/aqua/test_model.py @@ -5,6 +5,7 @@ import json import os +import re import shlex import tempfile from dataclasses import asdict @@ -13,9 +14,6 @@ import oci import pytest - -from ads.aqua.app import AquaApp -from ads.aqua.config.container_config import AquaContainerConfig from huggingface_hub.hf_api import HfApi, ModelInfo from parameterized import parameterized @@ -23,7 +21,7 @@ import ads.common import ads.common.oci_client import ads.config - +from ads.aqua.app import AquaApp from ads.aqua.common.entities import AquaMultiModelRef from ads.aqua.common.enums import ModelFormat, Tags from ads.aqua.common.errors import ( @@ -32,6 +30,7 @@ AquaValueError, ) from ads.aqua.common.utils import get_hf_model_info +from ads.aqua.config.container_config import AquaContainerConfig from ads.aqua.constants import HF_METADATA_FOLDER from ads.aqua.model import AquaModelApp from ads.aqua.model.entities import ( @@ -40,6 +39,7 @@ ImportModelDetails, ModelValidationResult, ) +from ads.aqua.model.enums import MultiModelSupportedTaskType from ads.common.object_storage_details import ObjectStorageDetails from ads.model.datascience_model import DataScienceModel from ads.model.model_metadata import ( @@ -47,7 +47,6 @@ ModelProvenanceMetadata, ModelTaxonomyMetadata, ) - from tests.unitary.with_extras.aqua.utils import ServiceManagedContainers @@ -397,12 +396,14 @@ def test_create_multimodel( model_info_1 = AquaMultiModelRef( model_id="test_model_id_1", gpu_count=2, + model_task = "text_embedding", env_var={"params": "--trust-remote-code --max-model-len 60000"}, ) model_info_2 = AquaMultiModelRef( model_id="test_model_id_2", gpu_count=2, + model_task = "image_text_to_text", env_var={"params": "--trust-remote-code --max-model-len 32000"}, ) @@ -439,6 +440,29 @@ def test_create_multimodel( mock_model.custom_metadata_list = custom_metadata_list mock_from_id.return_value = mock_model + # testing _extract_model_task when a user passes an invalid task to AquaMultiModelRef + model_info_1.model_task = "invalid_task" + + with pytest.raises(AquaValueError): + model = self.app.create_multi( + models=[model_info_1, model_info_2], + project_id="test_project_id", + compartment_id="test_compartment_id", + ) + + # testing if a user tries to invoke a model with a task mode that is not yet supported + model_info_1.model_task = None + mock_model.freeform_tags["task"] = "unsupported_task" + with pytest.raises(AquaValueError): + model = self.app.create_multi( + models=[model_info_1, model_info_2], + project_id="test_project_id", + compartment_id="test_compartment_id", + ) + + mock_model.freeform_tags["task"] = "text-generation" + model_info_1.model_task = "text_embedding" + # will create a multi-model group model = self.app.create_multi( models=[model_info_1, model_info_2],