Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions sentry_sdk/integrations/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,15 @@
OllamaEmbeddings = None


def _get_ai_system(all_params: "Dict[str, Any]") -> "Optional[str]":
ai_type = all_params.get("_type")

if not ai_type or not isinstance(ai_type, str):
return None

return ai_type
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raw _type values break gen_ai.system consistency with other integrations

Medium Severity

_get_ai_system returns the raw LangChain _type value (e.g. "anthropic-chat", "openai-chat") instead of a normalized provider name. The Anthropic integration sets gen_ai.system to "anthropic" and the OpenAI integration uses "openai", matching OTel semantic conventions. The old code also normalized to these values. Now the same provider gets different gen_ai.system values depending on whether it's called directly or through LangChain, breaking filtering/grouping in the Sentry UI for existing users.

Additional Locations (1)
Fix in Cursor Fix in Web

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is intentional, see #5707 (comment)



DATA_FIELDS = {
"frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
"function_call": SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
Expand Down Expand Up @@ -380,11 +389,9 @@
model,
)

ai_type = all_params.get("_type", "")
if "anthropic" in ai_type:
span.set_data(SPANDATA.GEN_AI_SYSTEM, "anthropic")
elif "openai" in ai_type:
span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai")
ai_system = _get_ai_system(all_params)
if ai_system:
span.set_data(SPANDATA.GEN_AI_SYSTEM, ai_system)

for key, attribute in DATA_FIELDS.items():
if key in all_params and all_params[key] is not None:
Expand Down Expand Up @@ -448,11 +455,9 @@
if model:
span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model)

ai_type = all_params.get("_type", "")
if "anthropic" in ai_type:
span.set_data(SPANDATA.GEN_AI_SYSTEM, "anthropic")
elif "openai" in ai_type:
span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai")
ai_system = _get_ai_system(all_params)
if ai_system:
span.set_data(SPANDATA.GEN_AI_SYSTEM, ai_system)

Check warning on line 460 in sentry_sdk/integrations/langchain.py

View check run for this annotation

@sentry/warden / warden: code-review

Breaking change in gen_ai.system values without migration guidance

The refactored `_get_ai_system()` now returns raw LangChain `_type` values (e.g., "openai-chat", "anthropic-chat") instead of normalized provider names ("openai", "anthropic"). This breaks backwards compatibility for users querying Sentry data with filters like `gen_ai.system:openai`. Additionally, there's now inconsistency across integrations: direct OpenAI integration reports "openai" while LangChain+OpenAI reports "openai-chat".

agent_name = _get_current_agent()
if agent_name:
Expand Down
92 changes: 92 additions & 0 deletions tests/integrations/langchain/test_langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def test_langchain_text_completion(

llm_span = llm_spans[0]
assert llm_span["description"] == "generate_text gpt-3.5-turbo"
assert llm_span["data"]["gen_ai.system"] == "openai"
assert llm_span["data"]["gen_ai.request.model"] == "gpt-3.5-turbo"
assert llm_span["data"]["gen_ai.response.text"] == "The capital of France is Paris."
assert llm_span["data"]["gen_ai.usage.total_tokens"] == 25
Expand Down Expand Up @@ -254,6 +255,7 @@ def test_langchain_create_agent(
assert len(chat_spans) == 1
assert chat_spans[0]["origin"] == "auto.ai.langchain"

assert chat_spans[0]["data"]["gen_ai.system"] == "openai-chat"
assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 10
assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 20
assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 30
Expand Down Expand Up @@ -413,10 +415,12 @@ def test_tool_execution_span(
assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142
assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50
assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192
assert chat_spans[0]["data"]["gen_ai.system"] == "openai-chat"

assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89
assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28
assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117
assert chat_spans[1]["data"]["gen_ai.system"] == "openai-chat"

if send_default_pii and include_prompts:
assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT]
Expand Down Expand Up @@ -2226,6 +2230,94 @@ def test_transform_google_file_data(self):
}


@pytest.mark.parametrize(
"ai_type,expected_system",
[
# Real LangChain _type values (from _llm_type properties)
# OpenAI
("openai-chat", "openai-chat"),
("openai", "openai"),
# Azure OpenAI
("azure-openai-chat", "azure-openai-chat"),
("azure", "azure"),
# Anthropic
("anthropic-chat", "anthropic-chat"),
# Google
("vertexai", "vertexai"),
("chat-google-generative-ai", "chat-google-generative-ai"),
("google_gemini", "google_gemini"),
# AWS Bedrock
("amazon_bedrock_chat", "amazon_bedrock_chat"),
("amazon_bedrock", "amazon_bedrock"),
# Cohere
("cohere-chat", "cohere-chat"),
# Ollama
("chat-ollama", "chat-ollama"),
("ollama-llm", "ollama-llm"),
# Mistral
("mistralai-chat", "mistralai-chat"),
# Fireworks
("fireworks-chat", "fireworks-chat"),
("fireworks", "fireworks"),
# HuggingFace
("huggingface-chat-wrapper", "huggingface-chat-wrapper"),
# Groq
("groq-chat", "groq-chat"),
# NVIDIA
("chat-nvidia-ai-playground", "chat-nvidia-ai-playground"),
# xAI
("xai-chat", "xai-chat"),
# DeepSeek
("chat-deepseek", "chat-deepseek"),
# Edge cases
("", None),
(None, None),
],
)
def test_langchain_ai_system_detection(
sentry_init, capture_events, ai_type, expected_system
):
sentry_init(
integrations=[LangchainIntegration()],
traces_sample_rate=1.0,
)
events = capture_events()

callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)

run_id = "test-ai-system-uuid"
serialized = {"_type": ai_type} if ai_type is not None else {}
prompts = ["Test prompt"]

with start_transaction():
callback.on_llm_start(
serialized=serialized,
prompts=prompts,
run_id=run_id,
invocation_params={"_type": ai_type, "model": "test-model"},
)

generation = Mock(text="Test response", message=None)
response = Mock(generations=[[generation]])
callback.on_llm_end(response=response, run_id=run_id)

assert len(events) > 0
tx = events[0]
assert tx["type"] == "transaction"

llm_spans = [
span for span in tx.get("spans", []) if span.get("op") == "gen_ai.generate_text"
]
assert len(llm_spans) > 0

llm_span = llm_spans[0]

if expected_system is not None:
assert llm_span["data"][SPANDATA.GEN_AI_SYSTEM] == expected_system
else:
assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("data", {})


class TestTransformLangchainMessageContent:
"""Tests for _transform_langchain_message_content function."""

Expand Down
Loading