Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenAI PDF support #30404

Closed
5 tasks done
austinmw opened this issue Mar 20, 2025 · 2 comments
Closed
5 tasks done

OpenAI PDF support #30404

austinmw opened this issue Mar 20, 2025 · 2 comments

Comments

@austinmw
Copy link

Checked other resources

  • I added a very descriptive title to this issue.
  • I searched the LangChain documentation with the integrated search.
  • I used the GitHub search to find a similar question and didn't find it.
  • I am sure that this is a bug in LangChain rather than my code.
  • The bug is not resolved by updating to the latest stable version of LangChain (or the specific integration package).

Example Code

This works for claude models, but not openai models

from base64 import b64encode
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage
import requests


url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
data = b64encode(requests.get(url).content).decode()
llm = ChatOpenAI(model="gpt-4o")
ai_msg = llm.invoke(
    [
        HumanMessage(
            [
                "Summarize this document.",
                {
                    "type": "document",
                    "source": {
                        "type": "base64",
                        "data": data,
                        "media_type": "application/pdf",
                    },
                },
            ]
        )
    ]
)
ai_msg.content

https://platform.openai.com/docs/guides/pdf-files?api-mode=chat&utm_source=alphasignal

Error Message and Stack Trace (if applicable)


BadRequestError Traceback (most recent call last)
Cell In[1], line 10
8 data = b64encode(requests.get(url).content).decode()
9 llm = ChatOpenAI(model="gpt-4o")
---> 10 ai_msg = llm.invoke(
11 [
12 HumanMessage(
13 [
14 "Summarize this document.",
15 {
16 "type": "document",
17 "source": {
18 "type": "base64",
19 "data": data,
20 "media_type": "application/pdf",
21 },
22 },
23 ]
24 )
25 ]
26 )
27 ai_msg.content

File ~/mambaforge/envs/elevate/lib/python3.13/site-packages/langchain_core/language_models/chat_models.py:307, in BaseChatModel.invoke(self, input, config, stop, **kwargs)
296 def invoke(
297 self,
298 input: LanguageModelInput,
(...)
302 **kwargs: Any,
303 ) -> BaseMessage:
304 config = ensure_config(config)
305 return cast(
306 ChatGeneration,
--> 307 self.generate_prompt(
308 [self._convert_input(input)],
309 stop=stop,
310 callbacks=config.get("callbacks"),
311 tags=config.get("tags"),
312 metadata=config.get("metadata"),
313 run_name=config.get("run_name"),
314 run_id=config.pop("run_id", None),
315 **kwargs,
316 ).generations[0][0],
317 ).message

File ~/mambaforge/envs/elevate/lib/python3.13/site-packages/langchain_core/language_models/chat_models.py:843, in BaseChatModel.generate_prompt(self, prompts, stop, callbacks, **kwargs)
835 def generate_prompt(
836 self,
837 prompts: list[PromptValue],
(...)
840 **kwargs: Any,
841 ) -> LLMResult:
842 prompt_messages = [p.to_messages() for p in prompts]
--> 843 return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)

File ~/mambaforge/envs/elevate/lib/python3.13/site-packages/langchain_core/language_models/chat_models.py:683, in BaseChatModel.generate(self, messages, stop, callbacks, tags, metadata, run_name, run_id, **kwargs)
680 for i, m in enumerate(messages):
681 try:
682 results.append(
--> 683 self._generate_with_cache(
684 m,
685 stop=stop,
686 run_manager=run_managers[i] if run_managers else None,
687 **kwargs,
688 )
689 )
690 except BaseException as e:
691 if run_managers:

File ~/mambaforge/envs/elevate/lib/python3.13/site-packages/langchain_core/language_models/chat_models.py:908, in BaseChatModel._generate_with_cache(self, messages, stop, run_manager, **kwargs)
906 else:
907 if inspect.signature(self._generate).parameters.get("run_manager"):
--> 908 result = self._generate(
909 messages, stop=stop, run_manager=run_manager, **kwargs
910 )
911 else:
912 result = self._generate(messages, stop=stop, **kwargs)

File ~/mambaforge/envs/elevate/lib/python3.13/site-packages/langchain_openai/chat_models/base.py:823, in BaseChatOpenAI._generate(self, messages, stop, run_manager, **kwargs)
821 generation_info = {"headers": dict(raw_response.headers)}
822 else:
--> 823 response = self.client.create(**payload)
824 return self._create_chat_result(response, generation_info)

File ~/mambaforge/envs/elevate/lib/python3.13/site-packages/openai/_utils/_utils.py:279, in required_args..inner..wrapper(*args, **kwargs)
277 msg = f"Missing required argument: {quote(missing[0])}"
278 raise TypeError(msg)
--> 279 return func(*args, **kwargs)

File ~/mambaforge/envs/elevate/lib/python3.13/site-packages/openai/resources/chat/completions/completions.py:879, in Completions.create(self, messages, model, audio, frequency_penalty, function_call, functions, logit_bias, logprobs, max_completion_tokens, max_tokens, metadata, modalities, n, parallel_tool_calls, prediction, presence_penalty, reasoning_effort, response_format, seed, service_tier, stop, store, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout)
837 @required_args(["messages", "model"], ["messages", "model", "stream"])
838 def create(
839 self,
(...)
876 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
877 ) -> ChatCompletion | Stream[ChatCompletionChunk]:
878 validate_response_format(response_format)
--> 879 return self._post(
880 "/chat/completions",
881 body=maybe_transform(
882 {
883 "messages": messages,
884 "model": model,
885 "audio": audio,
886 "frequency_penalty": frequency_penalty,
887 "function_call": function_call,
888 "functions": functions,
889 "logit_bias": logit_bias,
890 "logprobs": logprobs,
891 "max_completion_tokens": max_completion_tokens,
892 "max_tokens": max_tokens,
893 "metadata": metadata,
894 "modalities": modalities,
895 "n": n,
896 "parallel_tool_calls": parallel_tool_calls,
897 "prediction": prediction,
898 "presence_penalty": presence_penalty,
899 "reasoning_effort": reasoning_effort,
900 "response_format": response_format,
901 "seed": seed,
902 "service_tier": service_tier,
903 "stop": stop,
904 "store": store,
905 "stream": stream,
906 "stream_options": stream_options,
907 "temperature": temperature,
908 "tool_choice": tool_choice,
909 "tools": tools,
910 "top_logprobs": top_logprobs,
911 "top_p": top_p,
912 "user": user,
913 },
914 completion_create_params.CompletionCreateParams,
915 ),
916 options=make_request_options(
917 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
918 ),
919 cast_to=ChatCompletion,
920 stream=stream or False,
921 stream_cls=Stream[ChatCompletionChunk],
922 )

File ~/mambaforge/envs/elevate/lib/python3.13/site-packages/openai/_base_client.py:1296, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
1282 def post(
1283 self,
1284 path: str,
(...)
1291 stream_cls: type[_StreamT] | None = None,
1292 ) -> ResponseT | _StreamT:
1293 opts = FinalRequestOptions.construct(
1294 method="post", url=path, json_data=body, files=to_httpx_files(files), **options
1295 )
-> 1296 return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))

File ~/mambaforge/envs/elevate/lib/python3.13/site-packages/openai/_base_client.py:973, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
970 else:
971 retries_taken = 0
--> 973 return self._request(
974 cast_to=cast_to,
975 options=options,
976 stream=stream,
977 stream_cls=stream_cls,
978 retries_taken=retries_taken,
979 )

File ~/mambaforge/envs/elevate/lib/python3.13/site-packages/openai/_base_client.py:1077, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls)
1074 err.response.read()
1076 log.debug("Re-raising status error")
-> 1077 raise self._make_status_error_from_response(err.response) from None
1079 return self._process_response(
1080 cast_to=cast_to,
1081 options=options,
(...)
1085 retries_taken=retries_taken,
1086 )

BadRequestError: Error code: 400 - {'error': {'message': "Invalid type for 'messages[0].content[0]': expected an object, but got a string instead.", 'type': 'invalid_request_error', 'param': 'messages[0].content[0]', 'code': 'invalid_type'}}

Description

OpenAI has PDF support similar to Anthropic, but doesn't seem to be working

System Info

❯ python -m langchain_core.sys_info

System Information

OS: Darwin
OS Version: Darwin Kernel Version 24.2.0: Fri Dec 6 18:56:34 PST 2024; root:xnu-11215.61.5~2/RELEASE_ARM64_T6020
Python Version: 3.13.2 | packaged by conda-forge | (main, Feb 17 2025, 14:02:48) [Clang 18.1.8 ]

Package Information

langchain_core: 0.3.45
langchain: 0.3.20
langchain_community: 0.3.19
langsmith: 0.3.11
langchain_anthropic: 0.3.3
langchain_aws: 0.2.15
langchain_mcp_adapters: 0.0.3
langchain_ollama: 0.2.2
langchain_openai: 0.3.8
langchain_text_splitters: 0.3.6
langchainhub: 0.1.21
langgraph_sdk: 0.1.53
langgraph_supervisor: 0.0.9
langgraph_swarm: 0.0.5

Optional packages not installed

langserve

Other Dependencies

aiohttp<4.0.0,>=3.8.3: Installed. No version info available.
anthropic: 0.49.0
async-timeout<5.0.0,>=4.0.0;: Installed. No version info available.
boto3: 1.37.13
dataclasses-json<0.7,>=0.5.7: Installed. No version info available.
defusedxml: 0.7.1
httpx: 0.27.0
httpx-sse<1.0.0,>=0.4.0: Installed. No version info available.
jsonpatch<2.0,>=1.33: Installed. No version info available.
langchain-anthropic;: Installed. No version info available.
langchain-aws;: Installed. No version info available.
langchain-cohere;: Installed. No version info available.
langchain-community;: Installed. No version info available.
langchain-core<0.4.0,>=0.3.40: Installed. No version info available.
langchain-core<1.0.0,>=0.3.34: Installed. No version info available.
langchain-core<1.0.0,>=0.3.41: Installed. No version info available.
langchain-core<1.0.0,>=0.3.42: Installed. No version info available.
langchain-core>=0.3.36: Installed. No version info available.
langchain-deepseek;: Installed. No version info available.
langchain-fireworks;: Installed. No version info available.
langchain-google-genai;: Installed. No version info available.
langchain-google-vertexai;: Installed. No version info available.
langchain-groq;: Installed. No version info available.
langchain-huggingface;: Installed. No version info available.
langchain-mistralai;: Installed. No version info available.
langchain-ollama;: Installed. No version info available.
langchain-openai;: Installed. No version info available.
langchain-text-splitters<1.0.0,>=0.3.6: Installed. No version info available.
langchain-together;: Installed. No version info available.
langchain-xai;: Installed. No version info available.
langchain<1.0.0,>=0.3.20: Installed. No version info available.
langgraph-prebuilt<0.2.0,>=0.1.2: Installed. No version info available.
langgraph<0.4.0,>=0.3.5: Installed. No version info available.
langsmith-pyo3: Installed. No version info available.
langsmith<0.4,>=0.1.125: Installed. No version info available.
langsmith<0.4,>=0.1.17: Installed. No version info available.
mcp>=1.2.1: Installed. No version info available.
numpy: 2.2.3
numpy<3,>=1.26.2: Installed. No version info available.
ollama: 0.4.7
openai<2.0.0,>=1.58.1: Installed. No version info available.
orjson: 3.10.15
packaging: 24.2
packaging<25,>=23.2: Installed. No version info available.
pydantic: 2.10.6
pydantic-settings<3.0.0,>=2.4.0: Installed. No version info available.
pydantic<3.0.0,>=2.5.2;: Installed. No version info available.
pydantic<3.0.0,>=2.7.4: Installed. No version info available.
pydantic<3.0.0,>=2.7.4;: Installed. No version info available.
pytest: Installed. No version info available.
PyYAML>=5.3: Installed. No version info available.
requests: 2.32.3
requests-toolbelt: 1.0.0
requests<3,>=2: Installed. No version info available.
rich: 13.9.4
SQLAlchemy<3,>=1.4: Installed. No version info available.
tenacity!=8.4.0,<10,>=8.1.0: Installed. No version info available.
tenacity!=8.4.0,<10.0.0,>=8.1.0: Installed. No version info available.
tiktoken<1,>=0.7: Installed. No version info available.
types-requests: 2.32.0.20250306
typing-extensions>=4.7: Installed. No version info available.
zstandard: 0.23.0

@ccurme
Copy link
Collaborator

ccurme commented Mar 20, 2025

Currently you need to adhere to OpenAI's format (the doc you linked shows an example)

from base64 import b64encode
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage
import requests


url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
data = b64encode(requests.get(url).content).decode()
llm = ChatOpenAI(model="gpt-4o")
ai_msg = llm.invoke(
    [
        HumanMessage(
            [
                "Summarize this document.",
                {
                    "type": "file",
                    "file": {
                        "filename": "ooga",
                        "file_data":  f"data:application/pdf;base64,{data}",
                    }
                },
            ]
        )
    ]
)
ai_msg.content

@austinmw
Copy link
Author

Ah sorry thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants