Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 87 additions & 21 deletions posthog/ai/gemini/gemini_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,35 +29,76 @@ class GeminiMessage(TypedDict, total=False):
text: str


def _extract_text_from_parts(parts: List[Any]) -> str:
def _format_parts_as_content_blocks(parts: List[Any]) -> List[FormattedContentItem]:
"""
Extract and concatenate text from a parts array.
Format Gemini parts array into structured content blocks.

Preserves structure for multimodal content (text + images) instead of
concatenating everything into a string.

Args:
parts: List of parts that may contain text content
parts: List of parts that may contain text, inline_data, etc.

Returns:
Concatenated text from all parts
List of formatted content blocks
"""

content_parts = []
content_blocks: List[FormattedContentItem] = []

for part in parts:
# Handle dict with text field
if isinstance(part, dict) and "text" in part:
content_parts.append(part["text"])
content_blocks.append({"type": "text", "text": part["text"]})

# Handle string parts
elif isinstance(part, str):
content_parts.append(part)
content_blocks.append({"type": "text", "text": part})

# Handle dict with inline_data (images, documents, etc.)
elif isinstance(part, dict) and "inline_data" in part:
inline_data = part["inline_data"]
mime_type = inline_data.get("mime_type", "")
content_type = "image" if mime_type.startswith("image/") else "document"

content_blocks.append(
{
"type": content_type,
"inline_data": inline_data,
}
)

# Handle object with text attribute
elif hasattr(part, "text"):
# Get the text attribute value
text_value = getattr(part, "text", "")
content_parts.append(text_value if text_value else str(part))

else:
content_parts.append(str(part))
if text_value:
content_blocks.append({"type": "text", "text": text_value})

# Handle object with inline_data attribute
elif hasattr(part, "inline_data"):
inline_data = part.inline_data
# Convert to dict if needed
if hasattr(inline_data, "mime_type") and hasattr(inline_data, "data"):
# Determine type based on mime_type
mime_type = inline_data.mime_type
content_type = "image" if mime_type.startswith("image/") else "document"

content_blocks.append(
{
"type": content_type,
"inline_data": {
"mime_type": mime_type,
"data": inline_data.data,
},
}
)
else:
content_blocks.append(
{
"type": "image",
"inline_data": inline_data,
}
)

return "".join(content_parts)
return content_blocks


def _format_dict_message(item: Dict[str, Any]) -> FormattedMessage:
Expand All @@ -73,16 +114,17 @@ def _format_dict_message(item: Dict[str, Any]) -> FormattedMessage:

# Handle dict format with parts array (Gemini-specific format)
if "parts" in item and isinstance(item["parts"], list):
content = _extract_text_from_parts(item["parts"])
return {"role": item.get("role", "user"), "content": content}
content_blocks = _format_parts_as_content_blocks(item["parts"])
return {"role": item.get("role", "user"), "content": content_blocks}

# Handle dict with content field
if "content" in item:
content = item["content"]

if isinstance(content, list):
# If content is a list, extract text from it
content = _extract_text_from_parts(content)
# If content is a list, format it as content blocks
content_blocks = _format_parts_as_content_blocks(content)
return {"role": item.get("role", "user"), "content": content_blocks}

elif not isinstance(content, str):
content = str(content)
Expand Down Expand Up @@ -110,14 +152,14 @@ def _format_object_message(item: Any) -> FormattedMessage:

# Handle object with parts attribute
if hasattr(item, "parts") and hasattr(item.parts, "__iter__"):
content = _extract_text_from_parts(item.parts)
content_blocks = _format_parts_as_content_blocks(list(item.parts))
role = getattr(item, "role", "user") if hasattr(item, "role") else "user"

# Ensure role is a string
if not isinstance(role, str):
role = "user"

return {"role": role, "content": content}
return {"role": role, "content": content_blocks}

# Handle object with text attribute
if hasattr(item, "text"):
Expand All @@ -140,7 +182,8 @@ def _format_object_message(item: Any) -> FormattedMessage:
content = item.content

if isinstance(content, list):
content = _extract_text_from_parts(content)
content_blocks = _format_parts_as_content_blocks(content)
return {"role": role, "content": content_blocks}

elif not isinstance(content, str):
content = str(content)
Expand Down Expand Up @@ -193,6 +236,29 @@ def format_gemini_response(response: Any) -> List[FormattedMessage]:
}
)

elif hasattr(part, "inline_data") and part.inline_data:
# Handle audio/media inline data
import base64

inline_data = part.inline_data
mime_type = getattr(inline_data, "mime_type", "audio/pcm")
raw_data = getattr(inline_data, "data", b"")

# Encode binary data as base64 string for JSON serialization
if isinstance(raw_data, bytes):
data = base64.b64encode(raw_data).decode("utf-8")
else:
# Already a string (base64)
data = raw_data

content.append(
{
"type": "audio",
"mime_type": mime_type,
"data": data,
}
)

if content:
output.append(
{
Expand Down
6 changes: 6 additions & 0 deletions posthog/ai/openai/openai_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ def format_openai_response(response: Any) -> List[FormattedMessage]:
}
)

# Handle audio output (gpt-4o-audio-preview)
if hasattr(choice.message, "audio") and choice.message.audio:
# Convert Pydantic model to dict to capture all fields from OpenAI
audio_dict = choice.message.audio.model_dump()
content.append({"type": "audio", **audio_dict})

if content:
output.append(
{
Expand Down
32 changes: 27 additions & 5 deletions posthog/ai/sanitization.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
import os
import re
from typing import Any
from urllib.parse import urlparse

REDACTED_IMAGE_PLACEHOLDER = "[base64 image redacted]"


def _is_multimodal_enabled() -> bool:
"""Check if multimodal capture is enabled via environment variable."""
return os.environ.get("_INTERNAL_LLMA_MULTIMODAL", "").lower() in (
"true",
"1",
"yes",
)


def is_base64_data_url(text: str) -> bool:
return re.match(r"^data:([^;]+);base64,", text) is not None

Expand All @@ -27,6 +37,9 @@ def is_raw_base64(text: str) -> bool:


def redact_base64_data_url(value: Any) -> Any:
if _is_multimodal_enabled():
return value

if not isinstance(value, str):
return value

Expand Down Expand Up @@ -83,6 +96,11 @@ def sanitize_openai_image(item: Any) -> Any:
},
}

if item.get("type") == "audio" and "data" in item:
if _is_multimodal_enabled():
return item
return {**item, "data": REDACTED_IMAGE_PLACEHOLDER}

return item


Expand All @@ -100,6 +118,9 @@ def sanitize_openai_response_image(item: Any) -> Any:


def sanitize_anthropic_image(item: Any) -> Any:
if _is_multimodal_enabled():
return item

if not isinstance(item, dict):
return item

Expand All @@ -109,8 +130,6 @@ def sanitize_anthropic_image(item: Any) -> Any:
and item["source"].get("type") == "base64"
and "data" in item["source"]
):
# For Anthropic, if the source type is "base64", we should always redact the data
# The provider is explicitly telling us this is base64 data
return {
**item,
"source": {
Expand All @@ -123,6 +142,9 @@ def sanitize_anthropic_image(item: Any) -> Any:


def sanitize_gemini_part(part: Any) -> Any:
if _is_multimodal_enabled():
return part

if not isinstance(part, dict):
return part

Expand All @@ -131,8 +153,6 @@ def sanitize_gemini_part(part: Any) -> Any:
and isinstance(part["inline_data"], dict)
and "data" in part["inline_data"]
):
# For Gemini, the inline_data structure indicates base64 data
# We should redact any string data in this context
return {
**part,
"inline_data": {
Expand Down Expand Up @@ -185,7 +205,9 @@ def sanitize_langchain_image(item: Any) -> Any:
and isinstance(item.get("source"), dict)
and "data" in item["source"]
):
# Anthropic style - raw base64 in structured format, always redact
if _is_multimodal_enabled():
return item

return {
**item,
"source": {
Expand Down
14 changes: 12 additions & 2 deletions posthog/test/ai/gemini/test_gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,9 @@ def test_new_client_different_input_formats(
)
call_args = mock_client.capture.call_args[1]
props = call_args["properties"]
assert props["$ai_input"] == [{"role": "user", "content": "hey"}]
assert props["$ai_input"] == [
{"role": "user", "content": [{"type": "text", "text": "hey"}]}
]

# Test multiple parts in the parts array
mock_client.reset_mock()
Expand All @@ -418,7 +420,15 @@ def test_new_client_different_input_formats(
)
call_args = mock_client.capture.call_args[1]
props = call_args["properties"]
assert props["$ai_input"] == [{"role": "user", "content": "Hello world"}]
assert props["$ai_input"] == [
{
"role": "user",
"content": [
{"type": "text", "text": "Hello "},
{"type": "text", "text": "world"},
],
}
]

# Test list input with string
mock_client.capture.reset_mock()
Expand Down
14 changes: 12 additions & 2 deletions posthog/test/ai/gemini/test_gemini_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,9 @@ async def test_async_client_different_input_formats(
)
call_args = mock_client.capture.call_args[1]
props = call_args["properties"]
assert props["$ai_input"] == [{"role": "user", "content": "hey"}]
assert props["$ai_input"] == [
{"role": "user", "content": [{"type": "text", "text": "hey"}]}
]

# Test multiple parts in the parts array
mock_client.reset_mock()
Expand All @@ -403,7 +405,15 @@ async def test_async_client_different_input_formats(
)
call_args = mock_client.capture.call_args[1]
props = call_args["properties"]
assert props["$ai_input"] == [{"role": "user", "content": "Hello world"}]
assert props["$ai_input"] == [
{
"role": "user",
"content": [
{"type": "text", "text": "Hello "},
{"type": "text", "text": "world"},
],
}
]

# Test list input with string
mock_client.capture.reset_mock()
Expand Down
Loading
Loading