Skip to content

Commit 3977d8d

Browse files
vertex-mg-botcopybara-github
authored andcommitted
Add option for enable_llama_tool_parser
PiperOrigin-RevId: 741206007
1 parent 0fda02c commit 3977d8d

20 files changed

+105
-0
lines changed

notebooks/community/model_garden/model_garden_axolotl_finetuning.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,7 @@
950950
" use_dedicated_endpoint: bool = False,\n",
951951
" max_num_seqs: int = 256,\n",
952952
" model_type: str = None,\n",
953+
" enable_llama_tool_parser: bool = False,\n",
953954
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
954955
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
955956
" endpoint = aiplatform.Endpoint.create(\n",
@@ -1002,6 +1003,10 @@
10021003
" if model_type:\n",
10031004
" vllm_args.append(f\"--model-type={model_type}\")\n",
10041005
"\n",
1006+
" if enable_llama_tool_parser:\n",
1007+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
1008+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
1009+
"\n",
10051010
" env_vars = {\n",
10061011
" \"MODEL_ID\": base_model_id,\n",
10071012
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_codegemma_deployment_on_vertex.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,7 @@
592592
" use_dedicated_endpoint: bool = False,\n",
593593
" max_num_seqs: int = 256,\n",
594594
" model_type: str = None,\n",
595+
" enable_llama_tool_parser: bool = False,\n",
595596
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
596597
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
597598
" endpoint = aiplatform.Endpoint.create(\n",
@@ -644,6 +645,10 @@
644645
" if model_type:\n",
645646
" vllm_args.append(f\"--model-type={model_type}\")\n",
646647
"\n",
648+
" if enable_llama_tool_parser:\n",
649+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
650+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
651+
"\n",
647652
" env_vars = {\n",
648653
" \"MODEL_ID\": base_model_id,\n",
649654
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_gemma3_deployment_on_vertex.ipynb

+10
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@
263263
" use_dedicated_endpoint: bool = False,\n",
264264
" max_num_seqs: int = 256,\n",
265265
" model_type: str = None,\n",
266+
" enable_llama_tool_parser: bool = False,\n",
266267
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
267268
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
268269
" endpoint = aiplatform.Endpoint.create(\n",
@@ -315,6 +316,10 @@
315316
" if model_type:\n",
316317
" vllm_args.append(f\"--model-type={model_type}\")\n",
317318
"\n",
319+
" if enable_llama_tool_parser:\n",
320+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
321+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
322+
"\n",
318323
" env_vars = {\n",
319324
" \"MODEL_ID\": base_model_id,\n",
320325
" \"DEPLOY_SOURCE\": \"notebook\",\n",
@@ -635,6 +640,7 @@
635640
" use_dedicated_endpoint: bool = False,\n",
636641
" max_num_seqs: int = 256,\n",
637642
" model_type: str = None,\n",
643+
" enable_llama_tool_parser: bool = False,\n",
638644
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
639645
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
640646
" endpoint = aiplatform.Endpoint.create(\n",
@@ -687,6 +693,10 @@
687693
" if model_type:\n",
688694
" vllm_args.append(f\"--model-type={model_type}\")\n",
689695
"\n",
696+
" if enable_llama_tool_parser:\n",
697+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
698+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
699+
"\n",
690700
" env_vars = {\n",
691701
" \"MODEL_ID\": base_model_id,\n",
692702
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_gemma_deployment_on_vertex.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,7 @@
702702
" use_dedicated_endpoint: bool = False,\n",
703703
" max_num_seqs: int = 256,\n",
704704
" model_type: str = None,\n",
705+
" enable_llama_tool_parser: bool = False,\n",
705706
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
706707
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
707708
" endpoint = aiplatform.Endpoint.create(\n",
@@ -754,6 +755,10 @@
754755
" if model_type:\n",
755756
" vllm_args.append(f\"--model-type={model_type}\")\n",
756757
"\n",
758+
" if enable_llama_tool_parser:\n",
759+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
760+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
761+
"\n",
757762
" env_vars = {\n",
758763
" \"MODEL_ID\": base_model_id,\n",
759764
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_gradio_streaming_chat_completions.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@
208208
" use_dedicated_endpoint: bool = False,\n",
209209
" max_num_seqs: int = 256,\n",
210210
" model_type: str = None,\n",
211+
" enable_llama_tool_parser: bool = False,\n",
211212
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
212213
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
213214
" endpoint = aiplatform.Endpoint.create(\n",
@@ -260,6 +261,10 @@
260261
" if model_type:\n",
261262
" vllm_args.append(f\"--model-type={model_type}\")\n",
262263
"\n",
264+
" if enable_llama_tool_parser:\n",
265+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
266+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
267+
"\n",
263268
" env_vars = {\n",
264269
" \"MODEL_ID\": base_model_id,\n",
265270
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_llama_guard_deployment.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@
319319
" use_dedicated_endpoint: bool = False,\n",
320320
" max_num_seqs: int = 256,\n",
321321
" model_type: str = None,\n",
322+
" enable_llama_tool_parser: bool = False,\n",
322323
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
323324
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
324325
" endpoint = aiplatform.Endpoint.create(\n",
@@ -371,6 +372,10 @@
371372
" if model_type:\n",
372373
" vllm_args.append(f\"--model-type={model_type}\")\n",
373374
"\n",
375+
" if enable_llama_tool_parser:\n",
376+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
377+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
378+
"\n",
374379
" env_vars = {\n",
375380
" \"MODEL_ID\": base_model_id,\n",
376381
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_phi3_deployment.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@
384384
" use_dedicated_endpoint: bool = False,\n",
385385
" max_num_seqs: int = 256,\n",
386386
" model_type: str = None,\n",
387+
" enable_llama_tool_parser: bool = False,\n",
387388
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
388389
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
389390
" endpoint = aiplatform.Endpoint.create(\n",
@@ -436,6 +437,10 @@
436437
" if model_type:\n",
437438
" vllm_args.append(f\"--model-type={model_type}\")\n",
438439
"\n",
440+
" if enable_llama_tool_parser:\n",
441+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
442+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
443+
"\n",
439444
" env_vars = {\n",
440445
" \"MODEL_ID\": base_model_id,\n",
441446
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_phi4_deployment.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@
285285
" use_dedicated_endpoint: bool = False,\n",
286286
" max_num_seqs: int = 256,\n",
287287
" model_type: str = None,\n",
288+
" enable_llama_tool_parser: bool = False,\n",
288289
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
289290
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
290291
" endpoint = aiplatform.Endpoint.create(\n",
@@ -337,6 +338,10 @@
337338
" if model_type:\n",
338339
" vllm_args.append(f\"--model-type={model_type}\")\n",
339340
"\n",
341+
" if enable_llama_tool_parser:\n",
342+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
343+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
344+
"\n",
340345
" env_vars = {\n",
341346
" \"MODEL_ID\": base_model_id,\n",
342347
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_pytorch_biogpt_serve.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@
172172
" use_dedicated_endpoint: bool = False,\n",
173173
" max_num_seqs: int = 256,\n",
174174
" model_type: str = None,\n",
175+
" enable_llama_tool_parser: bool = False,\n",
175176
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
176177
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
177178
" endpoint = aiplatform.Endpoint.create(\n",
@@ -224,6 +225,10 @@
224225
" if model_type:\n",
225226
" vllm_args.append(f\"--model-type={model_type}\")\n",
226227
"\n",
228+
" if enable_llama_tool_parser:\n",
229+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
230+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
231+
"\n",
227232
" env_vars = {\n",
228233
" \"MODEL_ID\": base_model_id,\n",
229234
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_pytorch_codellama.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@
271271
" use_dedicated_endpoint: bool = False,\n",
272272
" max_num_seqs: int = 256,\n",
273273
" model_type: str = None,\n",
274+
" enable_llama_tool_parser: bool = False,\n",
274275
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
275276
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
276277
" endpoint = aiplatform.Endpoint.create(\n",
@@ -323,6 +324,10 @@
323324
" if model_type:\n",
324325
" vllm_args.append(f\"--model-type={model_type}\")\n",
325326
"\n",
327+
" if enable_llama_tool_parser:\n",
328+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
329+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
330+
"\n",
326331
" env_vars = {\n",
327332
" \"MODEL_ID\": base_model_id,\n",
328333
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_pytorch_llama3_1_agent_engine.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,7 @@
487487
" use_dedicated_endpoint: bool = False,\n",
488488
" max_num_seqs: int = 256,\n",
489489
" model_type: str = None,\n",
490+
" enable_llama_tool_parser: bool = False,\n",
490491
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
491492
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
492493
" endpoint = aiplatform.Endpoint.create(\n",
@@ -539,6 +540,10 @@
539540
" if model_type:\n",
540541
" vllm_args.append(f\"--model-type={model_type}\")\n",
541542
"\n",
543+
" if enable_llama_tool_parser:\n",
544+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
545+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
546+
"\n",
542547
" env_vars = {\n",
543548
" \"MODEL_ID\": base_model_id,\n",
544549
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_pytorch_llama3_1_deployment.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,7 @@
964964
" use_dedicated_endpoint: bool = False,\n",
965965
" max_num_seqs: int = 256,\n",
966966
" model_type: str = None,\n",
967+
" enable_llama_tool_parser: bool = False,\n",
967968
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
968969
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
969970
" endpoint = aiplatform.Endpoint.create(\n",
@@ -1016,6 +1017,10 @@
10161017
" if model_type:\n",
10171018
" vllm_args.append(f\"--model-type={model_type}\")\n",
10181019
"\n",
1020+
" if enable_llama_tool_parser:\n",
1021+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
1022+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
1023+
"\n",
10191024
" env_vars = {\n",
10201025
" \"MODEL_ID\": base_model_id,\n",
10211026
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_pytorch_llama3_1_reasoning_engine.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,7 @@
487487
" use_dedicated_endpoint: bool = False,\n",
488488
" max_num_seqs: int = 256,\n",
489489
" model_type: str = None,\n",
490+
" enable_llama_tool_parser: bool = False,\n",
490491
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
491492
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
492493
" endpoint = aiplatform.Endpoint.create(\n",
@@ -539,6 +540,10 @@
539540
" if model_type:\n",
540541
" vllm_args.append(f\"--model-type={model_type}\")\n",
541542
"\n",
543+
" if enable_llama_tool_parser:\n",
544+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
545+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
546+
"\n",
542547
" env_vars = {\n",
543548
" \"MODEL_ID\": base_model_id,\n",
544549
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_pytorch_llama3_2_deployment.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,7 @@
935935
" use_dedicated_endpoint: bool = False,\n",
936936
" max_num_seqs: int = 256,\n",
937937
" model_type: str = None,\n",
938+
" enable_llama_tool_parser: bool = False,\n",
938939
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
939940
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
940941
" endpoint = aiplatform.Endpoint.create(\n",
@@ -987,6 +988,10 @@
987988
" if model_type:\n",
988989
" vllm_args.append(f\"--model-type={model_type}\")\n",
989990
"\n",
991+
" if enable_llama_tool_parser:\n",
992+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
993+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
994+
"\n",
990995
" env_vars = {\n",
991996
" \"MODEL_ID\": base_model_id,\n",
992997
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_pytorch_llama3_deployment.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,7 @@
340340
" use_dedicated_endpoint: bool = False,\n",
341341
" max_num_seqs: int = 256,\n",
342342
" model_type: str = None,\n",
343+
" enable_llama_tool_parser: bool = False,\n",
343344
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
344345
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
345346
" endpoint = aiplatform.Endpoint.create(\n",
@@ -392,6 +393,10 @@
392393
" if model_type:\n",
393394
" vllm_args.append(f\"--model-type={model_type}\")\n",
394395
"\n",
396+
" if enable_llama_tool_parser:\n",
397+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
398+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
399+
"\n",
395400
" env_vars = {\n",
396401
" \"MODEL_ID\": base_model_id,\n",
397402
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_pytorch_llama3_finetuning.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,7 @@
615615
" use_dedicated_endpoint: bool = False,\n",
616616
" max_num_seqs: int = 256,\n",
617617
" model_type: str = None,\n",
618+
" enable_llama_tool_parser: bool = False,\n",
618619
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
619620
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
620621
" endpoint = aiplatform.Endpoint.create(\n",
@@ -667,6 +668,10 @@
667668
" if model_type:\n",
668669
" vllm_args.append(f\"--model-type={model_type}\")\n",
669670
"\n",
671+
" if enable_llama_tool_parser:\n",
672+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
673+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
674+
"\n",
670675
" env_vars = {\n",
671676
" \"MODEL_ID\": base_model_id,\n",
672677
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_pytorch_llava.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@
264264
" use_dedicated_endpoint: bool = False,\n",
265265
" max_num_seqs: int = 256,\n",
266266
" model_type: str = None,\n",
267+
" enable_llama_tool_parser: bool = False,\n",
267268
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
268269
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
269270
" endpoint = aiplatform.Endpoint.create(\n",
@@ -316,6 +317,10 @@
316317
" if model_type:\n",
317318
" vllm_args.append(f\"--model-type={model_type}\")\n",
318319
"\n",
320+
" if enable_llama_tool_parser:\n",
321+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
322+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
323+
"\n",
319324
" env_vars = {\n",
320325
" \"MODEL_ID\": base_model_id,\n",
321326
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_pytorch_mistral_deployment.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,7 @@
282282
" use_dedicated_endpoint: bool = False,\n",
283283
" max_num_seqs: int = 256,\n",
284284
" model_type: str = None,\n",
285+
" enable_llama_tool_parser: bool = False,\n",
285286
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
286287
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
287288
" endpoint = aiplatform.Endpoint.create(\n",
@@ -334,6 +335,10 @@
334335
" if model_type:\n",
335336
" vllm_args.append(f\"--model-type={model_type}\")\n",
336337
"\n",
338+
" if enable_llama_tool_parser:\n",
339+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
340+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
341+
"\n",
337342
" env_vars = {\n",
338343
" \"MODEL_ID\": base_model_id,\n",
339344
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_pytorch_mixtral_deployment.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@
295295
" use_dedicated_endpoint: bool = False,\n",
296296
" max_num_seqs: int = 256,\n",
297297
" model_type: str = None,\n",
298+
" enable_llama_tool_parser: bool = False,\n",
298299
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
299300
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
300301
" endpoint = aiplatform.Endpoint.create(\n",
@@ -347,6 +348,10 @@
347348
" if model_type:\n",
348349
" vllm_args.append(f\"--model-type={model_type}\")\n",
349350
"\n",
351+
" if enable_llama_tool_parser:\n",
352+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
353+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
354+
"\n",
350355
" env_vars = {\n",
351356
" \"MODEL_ID\": base_model_id,\n",
352357
" \"DEPLOY_SOURCE\": \"notebook\",\n",

notebooks/community/model_garden/model_garden_pytorch_qwen2_deployment.ipynb

+5
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@
332332
" use_dedicated_endpoint: bool = False,\n",
333333
" max_num_seqs: int = 256,\n",
334334
" model_type: str = None,\n",
335+
" enable_llama_tool_parser: bool = False,\n",
335336
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
336337
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
337338
" endpoint = aiplatform.Endpoint.create(\n",
@@ -384,6 +385,10 @@
384385
" if model_type:\n",
385386
" vllm_args.append(f\"--model-type={model_type}\")\n",
386387
"\n",
388+
" if enable_llama_tool_parser:\n",
389+
" vllm_args.append(\"--enable-auto-tool-choice\")\n",
390+
" vllm_args.append(\"--tool-call-parser=vertex-llama-3\")\n",
391+
"\n",
387392
" env_vars = {\n",
388393
" \"MODEL_ID\": base_model_id,\n",
389394
" \"DEPLOY_SOURCE\": \"notebook\",\n",

0 commit comments

Comments
 (0)