Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Vertex AI Model Garden deploy SDK Support for container specifications and equivalent Model Garden models for Hugging Face #5035

Merged
merged 1 commit into from
Mar 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 95 additions & 4 deletions tests/unit/vertexai/model_garden/test_model_garden.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,28 @@

_TEST_ENDPOINT_NAME = "projects/test-project/locations/us-central1/endpoints/1234567890"
_TEST_MODEL_NAME = "projects/test-project/locations/us-central1/models/9876543210"
_TEST_MODEL_CONTAINER_SPEC = types.ModelContainerSpec(
image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
command=["python", "main.py"],
args=["--model-id=gemma-2b"],
env=[types.EnvVar(name="MODEL_ID", value="gemma-2b")],
ports=[types.Port(container_port=7080)],
grpc_ports=[types.Port(container_port=7081)],
predict_route="/predictions/v1/predict",
health_route="/ping",
deployment_timeout=duration_pb2.Duration(seconds=1800),
shared_memory_size_mb=256,
startup_probe=types.Probe(
exec_=types.Probe.ExecAction(command=["python", "main.py"]),
period_seconds=10,
timeout_seconds=10,
),
health_probe=types.Probe(
exec_=types.Probe.ExecAction(command=["python", "health_check.py"]),
period_seconds=10,
timeout_seconds=10,
),
)


@pytest.fixture(scope="module")
Expand All @@ -65,7 +87,7 @@ def deploy_mock():
"deploy",
) as deploy:
mock_lro = mock.Mock(ga_operation.Operation)
mock_lro.result.return_value = types.DeployPublisherModelResponse(
mock_lro.result.return_value = types.DeployResponse(
endpoint=_TEST_ENDPOINT_NAME,
model=_TEST_MODEL_FULL_RESOURCE_NAME,
)
Expand Down Expand Up @@ -588,6 +610,71 @@ def test_deploy_with_serving_container_image_success(self, deploy_mock):
)

def test_deploy_with_serving_container_spec_success(self, deploy_mock):
"""Tests deploying a model with serving container spec."""
aiplatform.init(
project=_TEST_PROJECT,
location=_TEST_LOCATION,
)
model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
model.deploy(serving_container_spec=_TEST_MODEL_CONTAINER_SPEC)
deploy_mock.assert_called_once_with(
types.DeployRequest(
publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
model_config=types.DeployRequest.ModelConfig(
container_spec=_TEST_MODEL_CONTAINER_SPEC
),
)
)

def test_deploy_with_serving_container_spec_no_image_uri_raises_error(self):
"""Tests getting the supported deploy options for a model."""
aiplatform.init(
project=_TEST_PROJECT,
location=_TEST_LOCATION,
)

expected_message = (
"Serving container image uri is required for the serving container" " spec."
)
with pytest.raises(ValueError) as exception:
model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
model.deploy(
serving_container_spec=types.ModelContainerSpec(
predict_route="/predictions/v1/predict",
health_route="/ping",
)
)
assert str(exception.value) == expected_message

def test_deploy_with_serving_container_spec_with_both_image_uri_raises_error(
self,
):
"""Tests getting the supported deploy options for a model."""
aiplatform.init(
project=_TEST_PROJECT,
location=_TEST_LOCATION,
)

expected_message = (
"Serving container image uri is already set in the serving container"
" spec."
)
with pytest.raises(ValueError) as exception:
model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
model.deploy(
serving_container_spec=types.ModelContainerSpec(
image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
predict_route="/predictions/v1/predict",
health_route="/ping",
),
serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
)
assert str(exception.value) == expected_message

def test_deploy_with_serving_container_spec_individual_fields_success(
self, deploy_mock
):
"""Tests deploying a model with serving container spec."""
aiplatform.init(
project=_TEST_PROJECT,
Expand Down Expand Up @@ -665,7 +752,9 @@ def test_list_deploy_options(self, get_publisher_model_mock):
model.list_deploy_options()
get_publisher_model_mock.assert_called_with(
types.GetPublisherModelRequest(
name=_TEST_MODEL_FULL_RESOURCE_NAME, is_hugging_face_model=False
name=_TEST_MODEL_FULL_RESOURCE_NAME,
is_hugging_face_model=False,
include_equivalent_model_garden_model_deployment_configs=True,
)
)

Expand Down Expand Up @@ -697,8 +786,10 @@ def test_list_deployable_models(self, list_publisher_models_mock):
types.ListPublisherModelsRequest(
parent="publishers/*",
list_all_versions=True,
filter="is_hf_wildcard(true) AND "
"labels.VERIFIED_DEPLOYMENT_CONFIG=VERIFIED_DEPLOYMENT_SUCCEED",
filter=(
"is_hf_wildcard(true) AND "
"labels.VERIFIED_DEPLOYMENT_CONFIG=VERIFIED_DEPLOYMENT_SUCCEED"
),
)
)
assert hf_models == [
Expand Down
22 changes: 22 additions & 0 deletions vertexai/model_garden/_model_garden.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ def deploy(
endpoint_display_name: Optional[str] = None,
model_display_name: Optional[str] = None,
deploy_request_timeout: Optional[float] = None,
serving_container_spec: Optional[types.ModelContainerSpec] = None,
serving_container_image_uri: Optional[str] = None,
serving_container_predict_route: Optional[str] = None,
serving_container_health_route: Optional[str] = None,
Expand Down Expand Up @@ -400,6 +401,10 @@ def deploy(
model_display_name: The display name of the uploaded model.
deploy_request_timeout: The timeout for the deploy request. Default
is 2 hours.
serving_container_spec (types.ModelContainerSpec):
Optional. The container specification for the model instance.
This specification overrides the default container specification
and other serving container parameters.
serving_container_image_uri (str):
Optional. The URI of the Model serving container. This parameter is required
if the parameter `local_model` is not specified.
Expand Down Expand Up @@ -474,6 +479,11 @@ def deploy(
Returns:
endpoint (aiplatform.Endpoint):
Created endpoint.

Raises:
ValueError: If ``serving_container_spec`` is specified but ``serving_container_spec.image_uri``
is ``None``, or if ``serving_container_spec`` is specified but other
serving container parameters are specified.
"""
request = types.DeployRequest(
destination=f"projects/{self._project}/locations/{self._location}",
Expand Down Expand Up @@ -529,6 +539,17 @@ def deploy(
if fast_tryout_enabled:
request.deploy_config.fast_tryout_enabled = fast_tryout_enabled

if serving_container_spec:
if not serving_container_spec.image_uri:
raise ValueError(
"Serving container image uri is required for the serving container spec."
)
if serving_container_image_uri:
raise ValueError(
"Serving container image uri is already set in the serving container spec."
)
request.model_config.container_spec = serving_container_spec

if serving_container_image_uri:
request.model_config.container_spec = _construct_serving_container_spec(
serving_container_image_uri,
Expand Down Expand Up @@ -574,6 +595,7 @@ def list_deploy_options(
request = types.GetPublisherModelRequest(
name=self._publisher_model_name,
is_hugging_face_model="@" not in self._publisher_model_name,
include_equivalent_model_garden_model_deployment_configs=True,
)
response = self._us_central1_model_garden_client.get_publisher_model(request)
multi_deploy = (
Expand Down