feat: add support for structured output with zod schema. (#1749)

AVtheking · web-flow · commit 91a18e7057e9 · 2025-03-16T11:58:28.000+08:00
diff --git a/.changeset/mighty-eagles-wink.md b/.changeset/mighty-eagles-wink.md
@@ -0,0 +1,13 @@
+---
+"@llamaindex/huggingface": minor
+"@llamaindex/anthropic": minor
+"@llamaindex/mistral": minor
+"@llamaindex/google": minor
+"@llamaindex/ollama": minor
+"@llamaindex/openai": minor
+"@llamaindex/core": minor
+"@llamaindex/examples": minor
+---
+
+Added support for structured output in the chat api of openai and ollama
+Added structured output parameter in the provider
diff --git a/e2e/fixtures/llm/openai.ts b/e2e/fixtures/llm/openai.ts
@@ -42,6 +42,7 @@ export class OpenAI implements LLM {
       contextWindow: 2048,
       tokenizer: undefined,
       isFunctionCallingModel: true,
+      structuredOutput: false,
     };
   }
 
diff --git a/examples/jsonExtract.ts b/examples/jsonExtract.ts
@@ -1,27 +1,53 @@
 import { OpenAI } from "@llamaindex/openai";
+import { z } from "zod";
 
 // Example using OpenAI's chat API to extract JSON from a sales call transcript
 // using json_mode see https://platform.openai.com/docs/guides/text-generation/json-mode for more details
 
 const transcript =
   "[Phone rings]\n\nJohn: Hello, this is John.\n\nSarah: Hi John, this is Sarah from XYZ Company. I'm calling to discuss our new product, the XYZ Widget, and see if it might be a good fit for your business.\n\nJohn: Hi Sarah, thanks for reaching out. I'm definitely interested in learning more about the XYZ Widget. Can you give me a quick overview of what it does?\n\nSarah: Of course! The XYZ Widget is a cutting-edge tool that helps businesses streamline their workflow and improve productivity. It's designed to automate repetitive tasks and provide real-time data analytics to help you make informed decisions.\n\nJohn: That sounds really interesting. I can see how that could benefit our team. Do you have any case studies or success stories from other companies who have used the XYZ Widget?\n\nSarah: Absolutely, we have several case studies that I can share with you. I'll send those over along with some additional information about the product. I'd also love to schedule a demo for you and your team to see the XYZ Widget in action.\n\nJohn: That would be great. I'll make sure to review the case studies and then we can set up a time for the demo. In the meantime, are there any specific action items or next steps we should take?\n\nSarah: Yes, I'll send over the information and then follow up with you to schedule the demo. In the meantime, feel free to reach out if you have any questions or need further information.\n\nJohn: Sounds good, I appreciate your help Sarah. I'm looking forward to learning more about the XYZ Widget and seeing how it can benefit our business.\n\nSarah: Thank you, John. I'll be in touch soon. Have a great day!\n\nJohn: You too, bye.";
 
+const exampleSchema = z.object({
+  summary: z.string(),
+  products: z.array(z.string()),
+  rep_name: z.string(),
+  prospect_name: z.string(),
+  action_items: z.array(z.string()),
+});
+
+const example = {
+  summary:
+    "High-level summary of the call transcript. Should not exceed 3 sentences.",
+  products: ["product 1", "product 2"],
+  rep_name: "Name of the sales rep",
+  prospect_name: "Name of the prospect",
+  action_items: ["action item 1", "action item 2"],
+};
+
 async function main() {
   const llm = new OpenAI({
-    model: "gpt-4-1106-preview",
-    additionalChatOptions: { response_format: { type: "json_object" } },
+    model: "gpt-4o",
   });
 
-  const example = {
-    summary:
-      "High-level summary of the call transcript. Should not exceed 3 sentences.",
-    products: ["product 1", "product 2"],
-    rep_name: "Name of the sales rep",
-    prospect_name: "Name of the prospect",
-    action_items: ["action item 1", "action item 2"],
-  };
-
+  //response format as zod schema
   const response = await llm.chat({
+    messages: [
+      {
+        role: "system",
+        content: `You are an expert assistant for summarizing and extracting insights from sales call transcripts.`,
+      },
+      {
+        role: "user",
+        content: `Here is the transcript: \n------\n${transcript}\n------`,
+      },
+    ],
+    responseFormat: exampleSchema,
+  });
+
+  console.log(response.message.content);
+
+  //response format as json_object
+  const response2 = await llm.chat({
     messages: [
       {
         role: "system",
@@ -34,9 +60,10 @@ async function main() {
         content: `Here is the transcript: \n------\n${transcript}\n------`,
       },
     ],
+    responseFormat: { type: "json_object" },
   });
 
-  console.log(response.message.content);
+  console.log(response2.message.content);
 }
 
 main().catch(console.error);
diff --git a/packages/community/src/llm/bedrock/index.ts b/packages/community/src/llm/bedrock/index.ts
@@ -381,6 +381,7 @@ export class Bedrock extends ToolCallLLM<BedrockAdditionalChatOptions> {
       maxTokens: this.maxTokens,
       contextWindow: BEDROCK_FOUNDATION_LLMS[this.model] ?? 128000,
       tokenizer: undefined,
+      structuredOutput: false,
     };
   }
 
diff --git a/packages/core/src/llms/base.ts b/packages/core/src/llms/base.ts
@@ -28,11 +28,12 @@ export abstract class BaseLLM<
   async complete(
     params: LLMCompletionParamsStreaming | LLMCompletionParamsNonStreaming,
   ): Promise<CompletionResponse | AsyncIterable<CompletionResponse>> {
-    const { prompt, stream } = params;
+    const { prompt, stream, responseFormat } = params;
     if (stream) {
       const stream = await this.chat({
         messages: [{ content: prompt, role: "user" }],
         stream: true,
+        ...(responseFormat ? { responseFormat } : {}),
       });
       return streamConverter(stream, (chunk) => {
         return {
@@ -41,9 +42,12 @@ export abstract class BaseLLM<
         };
       });
     }
+
     const chatResponse = await this.chat({
       messages: [{ content: prompt, role: "user" }],
+      ...(responseFormat ? { responseFormat } : {}),
     });
+
     return {
       text: extractText(chatResponse.message.content),
       raw: chatResponse.raw,
diff --git a/packages/core/src/llms/type.ts b/packages/core/src/llms/type.ts
@@ -1,5 +1,6 @@
 import type { Tokenizers } from "@llamaindex/env/tokenizers";
 import type { JSONSchemaType } from "ajv";
+import { z } from "zod";
 import type { JSONObject, JSONValue } from "../global";
 
 /**
@@ -106,6 +107,7 @@ export type LLMMetadata = {
   maxTokens?: number | undefined;
   contextWindow: number;
   tokenizer: Tokenizers | undefined;
+  structuredOutput: boolean;
 };
 
 export interface LLMChatParamsBase<
@@ -115,6 +117,7 @@ export interface LLMChatParamsBase<
   messages: ChatMessage<AdditionalMessageOptions>[];
   additionalChatOptions?: AdditionalChatOptions;
   tools?: BaseTool[];
+  responseFormat?: z.ZodType | object;
 }
 
 export interface LLMChatParamsStreaming<
@@ -133,6 +136,7 @@ export interface LLMChatParamsNonStreaming<
 
 export interface LLMCompletionParamsBase {
   prompt: MessageContent;
+  responseFormat?: z.ZodType | object;
 }
 
 export interface LLMCompletionParamsStreaming extends LLMCompletionParamsBase {
diff --git a/packages/core/src/utils/mock.ts b/packages/core/src/utils/mock.ts
@@ -35,6 +35,7 @@ export class MockLLM extends ToolCallLLM {
       topP: 0.5,
       contextWindow: 1024,
       tokenizer: undefined,
+      structuredOutput: false,
     };
   }
 
diff --git a/packages/providers/anthropic/src/llm.ts b/packages/providers/anthropic/src/llm.ts
@@ -191,6 +191,7 @@ export class Anthropic extends ToolCallLLM<
             ].contextWindow
           : 200000,
       tokenizer: undefined,
+      structuredOutput: false,
     };
   }
 
diff --git a/packages/providers/google/src/base.ts b/packages/providers/google/src/base.ts
@@ -241,6 +241,7 @@ export class Gemini extends ToolCallLLM<GeminiAdditionalChatOptions> {
       maxTokens: this.maxTokens,
       contextWindow: GEMINI_MODEL_INFO_MAP[this.model].contextWindow,
       tokenizer: undefined,
+      structuredOutput: false,
     };
   }
 
diff --git a/packages/providers/huggingface/src/llm.ts b/packages/providers/huggingface/src/llm.ts
@@ -57,6 +57,7 @@ export class HuggingFaceLLM extends BaseLLM {
       maxTokens: this.maxTokens,
       contextWindow: this.contextWindow,
       tokenizer: undefined,
+      structuredOutput: false,
     };
   }
 
diff --git a/packages/providers/huggingface/src/shared.ts b/packages/providers/huggingface/src/shared.ts
@@ -123,6 +123,7 @@ export class HuggingFaceInferenceAPI extends BaseLLM {
       maxTokens: this.maxTokens,
       contextWindow: this.contextWindow,
       tokenizer: undefined,
+      structuredOutput: false,
     };
   }
 
diff --git a/packages/providers/mistral/src/llm.ts b/packages/providers/mistral/src/llm.ts
@@ -107,6 +107,7 @@ export class MistralAI extends ToolCallLLM<ToolCallLLMMessageOptions> {
       maxTokens: this.maxTokens,
       contextWindow: ALL_AVAILABLE_MISTRAL_MODELS[this.model].contextWindow,
       tokenizer: undefined,
+      structuredOutput: false,
     };
   }
 
diff --git a/packages/providers/ollama/package.json b/packages/providers/ollama/package.json
@@ -37,5 +37,17 @@
     "@llamaindex/env": "workspace:*",
     "ollama": "^0.5.10",
     "remeda": "^2.17.3"
+  },
+  "peerDependencies": {
+    "zod": "^3.24.2",
+    "zod-to-json-schema": "^3.23.3"
+  },
+  "peerDependenciesMeta": {
+    "zod": {
+      "optional": true
+    },
+    "zod-to-json-schema": {
+      "optional": true
+    }
   }
 }
diff --git a/packages/providers/ollama/src/llm.ts b/packages/providers/ollama/src/llm.ts
@@ -57,6 +57,22 @@ export type OllamaParams = {
   options?: Partial<Options>;
 };
 
+async function getZod() {
+  try {
+    return await import("zod");
+  } catch (e) {
+    throw new Error("zod is required for structured output");
+  }
+}
+
+async function getZodToJsonSchema() {
+  try {
+    return await import("zod-to-json-schema");
+  } catch (e) {
+    throw new Error("zod-to-json-schema is required for structured output");
+  }
+}
+
 export class Ollama extends ToolCallLLM {
   supportToolCall: boolean = true;
   public readonly ollama: OllamaBase;
@@ -92,6 +108,7 @@ export class Ollama extends ToolCallLLM {
       maxTokens: this.options.num_ctx,
       contextWindow: num_ctx,
       tokenizer: undefined,
+      structuredOutput: true,
     };
   }
 
@@ -109,7 +126,7 @@ export class Ollama extends ToolCallLLM {
   ): Promise<
     ChatResponse<ToolCallLLMMessageOptions> | AsyncIterable<ChatResponseChunk>
   > {
-    const { messages, stream, tools } = params;
+    const { messages, stream, tools, responseFormat } = params;
     const payload: ChatRequest = {
       model: this.model,
       messages: messages.map((message) => {
@@ -130,9 +147,20 @@ export class Ollama extends ToolCallLLM {
         ...this.options,
       },
     };
+
     if (tools) {
       payload.tools = tools.map((tool) => Ollama.toTool(tool));
     }
+
+    if (responseFormat && this.metadata.structuredOutput) {
+      const [{ zodToJsonSchema }, { z }] = await Promise.all([
+        getZodToJsonSchema(),
+        getZod(),
+      ]);
+      if (responseFormat instanceof z.ZodType)
+        payload.format = zodToJsonSchema(responseFormat);
+    }
+
     if (!stream) {
       const chatResponse = await this.ollama.chat({
         ...payload,
diff --git a/packages/providers/openai/package.json b/packages/providers/openai/package.json
@@ -35,6 +35,7 @@
   "dependencies": {
     "@llamaindex/core": "workspace:*",
     "@llamaindex/env": "workspace:*",
-    "openai": "^4.86.0"
+    "openai": "^4.86.0",
+    "zod": "^3.24.2"
   }
 }
diff --git a/packages/providers/openai/src/llm.ts b/packages/providers/openai/src/llm.ts
@@ -22,6 +22,7 @@ import type {
   ClientOptions as OpenAIClientOptions,
   OpenAI as OpenAILLM,
 } from "openai";
+import { zodResponseFormat } from "openai/helpers/zod";
 import type { ChatModel } from "openai/resources/chat/chat";
 import type {
   ChatCompletionAssistantMessageParam,
@@ -32,7 +33,12 @@ import type {
   ChatCompletionToolMessageParam,
   ChatCompletionUserMessageParam,
 } from "openai/resources/chat/completions";
-import type { ChatCompletionMessageParam } from "openai/resources/index.js";
+import type {
+  ChatCompletionMessageParam,
+  ResponseFormatJSONObject,
+  ResponseFormatJSONSchema,
+} from "openai/resources/index.js";
+import { z } from "zod";
 import {
   AzureOpenAIWithUserAgent,
   getAzureConfigFromEnv,
@@ -292,6 +298,7 @@ export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> {
       maxTokens: this.maxTokens,
       contextWindow,
       tokenizer: Tokenizers.CL100K_BASE,
+      structuredOutput: true,
     };
   }
 
@@ -385,7 +392,8 @@ export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> {
     | ChatResponse<ToolCallLLMMessageOptions>
     | AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>>
   > {
-    const { messages, stream, tools, additionalChatOptions } = params;
+    const { messages, stream, tools, responseFormat, additionalChatOptions } =
+      params;
     const baseRequestParams = <OpenAILLM.Chat.ChatCompletionCreateParams>{
       model: this.model,
       temperature: this.temperature,
@@ -408,6 +416,20 @@ export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> {
     if (!isTemperatureSupported(baseRequestParams.model))
       delete baseRequestParams.temperature;
 
+    //add response format for the structured output
+    if (responseFormat && this.metadata.structuredOutput) {
+      if (responseFormat instanceof z.ZodType)
+        baseRequestParams.response_format = zodResponseFormat(
+          responseFormat,
+          "response_format",
+        );
+      else {
+        baseRequestParams.response_format = responseFormat as
+          | ResponseFormatJSONObject
+          | ResponseFormatJSONSchema;
+      }
+    }
+
     // Streaming
     if (stream) {
       return this.streamChat(baseRequestParams);
diff --git a/packages/providers/perplexity/src/llm.ts b/packages/providers/perplexity/src/llm.ts
@@ -64,6 +64,7 @@ export class Perplexity extends OpenAI {
       contextWindow:
         PERPLEXITY_MODELS[this.model as PerplexityModelName]?.contextWindow,
       tokenizer: Tokenizers.CL100K_BASE,
+      structuredOutput: false,
     };
   }
 }
diff --git a/packages/providers/replicate/src/llm.ts b/packages/providers/replicate/src/llm.ts
@@ -145,6 +145,7 @@ export class ReplicateLLM extends BaseLLM {
       maxTokens: this.maxTokens,
       contextWindow: ALL_AVAILABLE_REPLICATE_MODELS[this.model].contextWindow,
       tokenizer: undefined,
+      structuredOutput: false,
     };
   }
 
diff --git a/packages/providers/vercel/src/llm.ts b/packages/providers/vercel/src/llm.ts
@@ -41,6 +41,7 @@ export class VercelLLM extends ToolCallLLM<VercelAdditionalChatOptions> {
       topP: 1,
       contextWindow: 128000,
       tokenizer: undefined,
+      structuredOutput: false,
     };
   }
 
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml

Original file line number	Diff line number	Diff line change
`@@ -42,6 +42,7 @@ export class OpenAI implements LLM {`
`42`	`42`	`contextWindow: 2048,`
`43`	`43`	`tokenizer: undefined,`
`44`	`44`	`isFunctionCallingModel: true,`
	`45`	`+ structuredOutput: false,`
`45`	`46`	`};`
`46`	`47`	`}`
`47`	`48`
Original file line number	Diff line number	Diff line change
`@@ -381,6 +381,7 @@ export class Bedrock extends ToolCallLLM<BedrockAdditionalChatOptions> {`
`381`	`381`	`maxTokens: this.maxTokens,`
`382`	`382`	`contextWindow: BEDROCK_FOUNDATION_LLMS[this.model] ?? 128000,`
`383`	`383`	`tokenizer: undefined,`
	`384`	`+ structuredOutput: false,`
`384`	`385`	`};`
`385`	`386`	`}`
`386`	`387`
Original file line number	Diff line number	Diff line change
`@@ -35,6 +35,7 @@ export class MockLLM extends ToolCallLLM {`
`35`	`35`	`topP: 0.5,`
`36`	`36`	`contextWindow: 1024,`
`37`	`37`	`tokenizer: undefined,`
	`38`	`+ structuredOutput: false,`
`38`	`39`	`};`
`39`	`40`	`}`
`40`	`41`
Original file line number	Diff line number	Diff line change
`@@ -191,6 +191,7 @@ export class Anthropic extends ToolCallLLM<`
`191`	`191`	`].contextWindow`
`192`	`192`	`: 200000,`
`193`	`193`	`tokenizer: undefined,`
	`194`	`+ structuredOutput: false,`
`194`	`195`	`};`
`195`	`196`	`}`
`196`	`197`
Original file line number	Diff line number	Diff line change
`@@ -241,6 +241,7 @@ export class Gemini extends ToolCallLLM<GeminiAdditionalChatOptions> {`
`241`	`241`	`maxTokens: this.maxTokens,`
`242`	`242`	`contextWindow: GEMINI_MODEL_INFO_MAP[this.model].contextWindow,`
`243`	`243`	`tokenizer: undefined,`
	`244`	`+ structuredOutput: false,`
`244`	`245`	`};`
`245`	`246`	`}`
`246`	`247`
Original file line number	Diff line number	Diff line change
`@@ -57,6 +57,7 @@ export class HuggingFaceLLM extends BaseLLM {`
`57`	`57`	`maxTokens: this.maxTokens,`
`58`	`58`	`contextWindow: this.contextWindow,`
`59`	`59`	`tokenizer: undefined,`
	`60`	`+ structuredOutput: false,`
`60`	`61`	`};`
`61`	`62`	`}`
`62`	`63`