1515
1616def get_default_probe_settings () -> ProbeSettings :
1717 """Get default probe settings for deployments."""
18- return ProbeSettings ( # Probes are APIs exposed by the deployment which informs the frameworktraffic
19- initial_delay = 1400 , # if the deployment is healthy and ready to receive
18+ return ProbeSettings ( # Probes are APIs exposed by the deployment which informs the frameworktraffic
19+ initial_delay = 1400 , # if the deployment is healthy and ready to receive
2020 period = 30 ,
2121 timeout = 2 ,
2222 success_threshold = 1 ,
23- failure_threshold = 30
23+ failure_threshold = 30 ,
2424 )
2525
2626
2727def get_default_request_settings () -> OnlineRequestSettings :
2828 """Get default request settings for deployments."""
29- return OnlineRequestSettings ( # Online request setting which controls timeout and concurrent request per instance
29+ return OnlineRequestSettings ( # Online request setting which controls timeout and concurrent request per instance
3030 request_timeout_ms = 90000 ,
3131 max_concurrent_requests_per_instance = 4 ,
3232 )
3333
3434
3535def create_managed_deployment (
3636 ml_client : MLClient ,
37- model_asset_id : str , # Asset ID of the model to deploy
38- instance_type : str , # Supported instance type for managed deployment
39- environment_asset_id : Optional [str ] = None , # Asset ID of the serving engine to use
37+ model_asset_id : str , # Asset ID of the model to deploy
38+ instance_type : str , # Supported instance type for managed deployment
39+ environment_asset_id : Optional [str ] = None , # Asset ID of the serving engine to use
4040 endpoint_name : Optional [str ] = None ,
4141 endpoint_description : str = "Sample endpoint" ,
4242 endpoint_tags : dict = {},
4343 deployment_name : Optional [str ] = None ,
4444 deployment_env_vars : dict = {},
4545) -> str :
4646 """Create a managed deployment."""
47- guid = str (uuid .uuid4 ())[:8 ] # Unique suffix to avoid name collisions
47+ guid = str (uuid .uuid4 ())[:8 ] # Unique suffix to avoid name collisions
4848 endpoint_name = endpoint_name or f"rl-endpoint"
49- endpoint_name = f"{ endpoint_name } -{ guid } " # Unique names prevent collisions and allow parallel experiments
49+ endpoint_name = f"{ endpoint_name } -{ guid } " # Unique names prevent collisions and allow parallel experiments
5050 deployment_name = deployment_name or "default"
5151
52- endpoint = ManagedOnlineEndpoint ( # Use AzureML endpoint abstraction for traffic management and auth
52+ endpoint = ManagedOnlineEndpoint ( # Use AzureML endpoint abstraction for traffic management and auth
5353 name = endpoint_name ,
5454 auth_mode = "key" ,
5555 description = endpoint_description ,
5656 tags = endpoint_tags ,
5757 )
5858
5959 print (f"Creating endpoint: { endpoint_name } " )
60- ml_client .online_endpoints .begin_create_or_update (endpoint ).wait () # Using there the endpoint object to trigger actual endpoint in AML workspace.
60+ ml_client .online_endpoints .begin_create_or_update (
61+ endpoint
62+ ).wait () # Using there the endpoint object to trigger actual endpoint in AML workspace.
6163
62- deployment = ManagedOnlineDeployment ( # Use deployment abstraction for scaling, versioning, and isolation
64+ deployment = ManagedOnlineDeployment ( # Use deployment abstraction for scaling, versioning, and isolation
6365 name = deployment_name ,
6466 endpoint_name = endpoint_name ,
6567 model = model_asset_id ,
@@ -72,8 +74,8 @@ def create_managed_deployment(
7274 request_settings = get_default_request_settings (),
7375 )
7476
75- print (f"Creating deployment (15-20 min)..." ) #
76- ml_client .online_deployments .begin_create_or_update (deployment ).wait ()
77+ print (f"Creating deployment (15-20 min)..." ) #
78+ ml_client .online_deployments .begin_create_or_update (deployment ).wait ()
7779
7880 # Route all traffic to new deployment for immediate use
7981 endpoint .traffic = {deployment_name : 100 }
@@ -86,10 +88,10 @@ def create_managed_deployment(
8688
8789def create_kubernetes_deployment (
8890 ml_client : MLClient ,
89- model_asset_id : str , # Asset ID of the model to deploy
90- environment_asset_id : str , # Asset ID of the serving engine to use
91- instance_type : str , # Kubernetes supports partial node usage granular upto the GPU level
92- compute_name : str , # Name of the compute which will be use for endpoint creation
91+ model_asset_id : str , # Asset ID of the model to deploy
92+ environment_asset_id : str , # Asset ID of the serving engine to use
93+ instance_type : str , # Kubernetes supports partial node usage granular upto the GPU level
94+ compute_name : str , # Name of the compute which will be use for endpoint creation
9395 endpoint_name : Optional [str ] = None ,
9496 endpoint_description : str = "Sample endpoint" ,
9597 endpoint_tags : dict = {},
@@ -98,15 +100,15 @@ def create_kubernetes_deployment(
98100 model_mount_path : str = "/var/model-mount" ,
99101) -> str :
100102 """Create endpoint using Kubernetes."""
101-
103+
102104 print ("🌐 Creating endpoint..." )
103105
104- guid = str (uuid .uuid4 ())[:8 ] # Unique suffix to avoid name collisions
106+ guid = str (uuid .uuid4 ())[:8 ] # Unique suffix to avoid name collisions
105107 endpoint_name = endpoint_name or f"rl-endpoint"
106- endpoint_name = f"{ endpoint_name } -{ guid } " # Unique names prevent collisions and allow parallel experiments
108+ endpoint_name = f"{ endpoint_name } -{ guid } " # Unique names prevent collisions and allow parallel experiments
107109 deployment_name = deployment_name or "default"
108110
109- endpoint = KubernetesOnlineEndpoint ( # Use AzureML endpoint abstraction for traffic management and auth
111+ endpoint = KubernetesOnlineEndpoint ( # Use AzureML endpoint abstraction for traffic management and auth
110112 name = endpoint_name ,
111113 auth_mode = "key" ,
112114 compute = compute_name ,
@@ -115,9 +117,11 @@ def create_kubernetes_deployment(
115117 )
116118
117119 print (f"Creating endpoint: { endpoint_name } " )
118- ml_client .online_endpoints .begin_create_or_update (endpoint ).wait () # Using there the endpoint object to trigger actual endpoint in AML workspace.
120+ ml_client .online_endpoints .begin_create_or_update (
121+ endpoint
122+ ).wait () # Using there the endpoint object to trigger actual endpoint in AML workspace.
119123
120- deployment = KubernetesOnlineDeployment ( # Use deployment abstraction for scaling, versioning, and isolation
124+ deployment = KubernetesOnlineDeployment ( # Use deployment abstraction for scaling, versioning, and isolation
121125 name = deployment_name ,
122126 endpoint_name = endpoint_name ,
123127 model = model_asset_id ,
@@ -131,8 +135,8 @@ def create_kubernetes_deployment(
131135 request_settings = get_default_request_settings (),
132136 )
133137
134- print (f"Creating deployment (15-20 min)..." ) #
135- ml_client .online_deployments .begin_create_or_update (deployment ).wait ()
138+ print (f"Creating deployment (15-20 min)..." ) #
139+ ml_client .online_deployments .begin_create_or_update (deployment ).wait ()
136140
137141 # Route all traffic to new deployment for immediate use
138142 endpoint .traffic = {deployment_name : 100 }
@@ -165,7 +169,7 @@ def test_deployment(ml_client, endpoint_name):
165169Context: A company has revenue of $1,000,000 and expenses of $750,000.
166170
167171Question: What is the profit margin as a percentage?
168- Let's think step by step and put final answer after ####."""
172+ Let's think step by step and put final answer after ####.""" ,
169173 }
170174 ],
171175 "max_tokens" : 512 ,
0 commit comments