Skip to content
44 changes: 37 additions & 7 deletions pydantic_ai_slim/pydantic_ai/models/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
BuiltinToolReturnPart,
FilePart,
FileUrl,
DocumentUrl,
FinishReason,
ModelMessage,
ModelRequest,
Expand Down Expand Up @@ -90,7 +91,7 @@
'Please install `google-genai` to use the Google model, '
'you can use the `google` optional group — `pip install "pydantic-ai-slim[google]"`'
) from _import_error

LatestGoogleModelNames = Literal[
'gemini-2.0-flash',
'gemini-2.0-flash-lite',
Expand Down Expand Up @@ -567,17 +568,34 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
if isinstance(item, str):
content.append({'text': item})
elif isinstance(item, BinaryContent):
inline_data_dict: BlobDict = {'data': item.data, 'mime_type': item.media_type}
part_dict: PartDict = {'inline_data': inline_data_dict}
if item.vendor_metadata:
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)
if self._is_text_like_media_type(item.media_type):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need tests for this behavior like we have in test_openai.py

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check the function test_google_model_json_document_url_input in test_google.py. That should work

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to use the same _inline_text_file_part we use in OpenAI, so that the text is properly formatted as representing a file.

I suggest moving it to a method on BinaryContent that returns the text with the fencing.

_is_text_like_media_type can become a method on BinaryContent and DocumentUrl as well.

When we check isinstance(item, DocumentUrl) and then do downloaded_text = await download_item(item, data_format='text'), we can create a BinaryContent from the result of download_item, and the call the new inline_text_file method on it.

content.append({'text': item.data.decode('utf-8')})
else:
inline_data_dict: BlobDict = {'data': item.data, 'mime_type': item.media_type}
part_dict: PartDict = {'inline_data': inline_data_dict}
if item.vendor_metadata:
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)

elif isinstance(item, DocumentUrl):
if self._is_text_like_media_type(item.media_type):
downloaded_text = await download_item(item, data_format='text')
content.append({'text': downloaded_text['data']})
else:
downloaded_item = await download_item(item, data_format='bytes')
inline_data_dict: BlobDict = {
'data': downloaded_item['data'],
'mime_type': downloaded_item['data_type'],
}
content.append({'inline_data': inline_data_dict})

elif isinstance(item, VideoUrl) and item.is_youtube:
file_data_dict: FileDataDict = {'file_uri': item.url, 'mime_type': item.media_type}
part_dict: PartDict = {'file_data': file_data_dict}
if item.vendor_metadata: # pragma: no branch
part_dict['video_metadata'] = cast(VideoMetadataDict, item.vendor_metadata)
content.append(part_dict)

elif isinstance(item, FileUrl):
if item.force_download or (
# google-gla does not support passing file urls directly, except for youtube videos
Expand All @@ -596,7 +614,19 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
content.append({'file_data': file_data_dict}) # pragma: lax no cover
else:
assert_never(item)
return content

return content

@staticmethod
def _is_text_like_media_type(media_type: str) -> bool:
return (
media_type.startswith('text/')
or media_type == 'application/json'
or media_type.endswith('+json')
or media_type == 'application/xml'
or media_type.endswith('+xml')
or media_type in ('application/x-yaml', 'application/yaml')
)

def _map_response_schema(self, o: OutputObjectDefinition) -> dict[str, Any]:
response_schema = o.json_schema.copy()
Expand Down
10 changes: 10 additions & 0 deletions tests/models/test_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,16 @@ async def test_google_model_text_document_url_input(allow_model_requests: None,
'The main content of the TXT file is an explanation of the placeholder name "John Doe" (and related variations) and its usage in legal contexts, popular culture, and other situations where the identity of a person is unknown or needs to be withheld. The document also includes the purpose of the file and other file type information.\n'
)

async def test_google_model_json_document_url_input(allow_model_requests: None, google_provider: GoogleProvider):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The VCR cassette will be generated automatically when you call uv run pytest <path> --record-mode=rewrite, is that what you did?

m = GoogleModel('gemini-2.5-pro', provider=google_provider)
agent = Agent(m, system_prompt='You are a helpful chatbot.')

json_document_url = DocumentUrl(url='https://kamalscraping-collab.github.io/sample-data/sample_transcript.json')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we please use a different public JSON file that's not dependent on your repo?


result = await agent.run(['What is the main content of this document?', json_document_url])
assert result.output == snapshot(
"Based on the JSON data provided, the document contains the log of a conversation between a user and an AI assistant.\n"
)

async def test_google_model_text_as_binary_content_input(allow_model_requests: None, google_provider: GoogleProvider):
m = GoogleModel('gemini-2.0-flash', provider=google_provider)
Expand Down
Loading