Skip to content

Commit 253a4d4

Browse files
feat: Add option to send GenAI spans in the new span format (#6079)
Add the `stream_gen_ai_spans` option. Convert any child span with an `op` starting with `gen_ai.` to the new span format if the option is enabled. Send a V2 span container in the same envelope as the transaction containing `gen_ai.*` spans, if applicable. Parametrize tests on the new option.
1 parent 619d803 commit 253a4d4

15 files changed

Lines changed: 20203 additions & 7819 deletions

File tree

sentry_sdk/_types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ class SDKInfo(TypedDict):
242242
"type": Literal["check_in", "transaction"],
243243
"user": dict[str, object],
244244
"_dropped_spans": int,
245+
"_has_gen_ai_span": bool,
245246
},
246247
total=False,
247248
)

sentry_sdk/client.py

Lines changed: 243 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
import uuid
33
import random
44
import socket
5-
from collections.abc import Mapping
5+
from collections.abc import Mapping, Iterable
66
from datetime import datetime, timezone
77
from importlib import import_module
88
from typing import TYPE_CHECKING, List, Dict, cast, overload
99
import warnings
10+
import json
1011

1112
from sentry_sdk._compat import check_uwsgi_thread_support
1213
from sentry_sdk._metrics_batcher import MetricsBatcher
@@ -30,6 +31,7 @@
3031
)
3132
from sentry_sdk.serializer import serialize
3233
from sentry_sdk.tracing import trace
34+
from sentry_sdk.traces import SpanStatus
3335
from sentry_sdk.tracing_utils import has_span_streaming_enabled
3436
from sentry_sdk.transport import (
3537
HttpTransportCore,
@@ -38,6 +40,7 @@
3840
)
3941
from sentry_sdk.consts import (
4042
SPANDATA,
43+
SPANSTATUS,
4144
DEFAULT_MAX_VALUE_LENGTH,
4245
DEFAULT_OPTIONS,
4346
INSTRUMENTER,
@@ -47,7 +50,7 @@
4750
from sentry_sdk.integrations import _DEFAULT_INTEGRATIONS, setup_integrations
4851
from sentry_sdk.integrations.dedupe import DedupeIntegration
4952
from sentry_sdk.sessions import SessionFlusher
50-
from sentry_sdk.envelope import Envelope
53+
from sentry_sdk.envelope import Envelope, Item, PayloadRef
5154
from sentry_sdk.profiler.continuous_profiler import setup_continuous_profiler
5255
from sentry_sdk.profiler.transaction_profiler import (
5356
has_profiling_enabled,
@@ -56,6 +59,7 @@
5659
)
5760
from sentry_sdk.scrubber import EventScrubber
5861
from sentry_sdk.monitor import Monitor
62+
from sentry_sdk.utils import datetime_from_isoformat
5963

6064
if TYPE_CHECKING:
6165
from typing import Any
@@ -66,7 +70,15 @@
6670
from typing import Union
6771
from typing import TypeVar
6872

69-
from sentry_sdk._types import Event, Hint, SDKInfo, Log, Metric, EventDataCategory
73+
from sentry_sdk._types import (
74+
Event,
75+
Hint,
76+
SDKInfo,
77+
Log,
78+
Metric,
79+
EventDataCategory,
80+
SerializedAttributeValue,
81+
)
7082
from sentry_sdk.integrations import Integration
7183
from sentry_sdk.scope import Scope
7284
from sentry_sdk.session import Session
@@ -89,6 +101,196 @@
89101
}
90102

91103

104+
def _serialized_v1_attribute_to_serialized_v2_attribute(
105+
attribute_value: "Any",
106+
) -> "Optional[SerializedAttributeValue]":
107+
if isinstance(attribute_value, bool):
108+
return {
109+
"value": attribute_value,
110+
"type": "boolean",
111+
}
112+
113+
if isinstance(attribute_value, int):
114+
return {
115+
"value": attribute_value,
116+
"type": "integer",
117+
}
118+
119+
if isinstance(attribute_value, float):
120+
return {
121+
"value": attribute_value,
122+
"type": "double",
123+
}
124+
125+
if isinstance(attribute_value, str):
126+
return {
127+
"value": attribute_value,
128+
"type": "string",
129+
}
130+
131+
if isinstance(attribute_value, list):
132+
if not attribute_value:
133+
return {"value": [], "type": "array"}
134+
135+
ty = type(attribute_value[0])
136+
if ty in (int, str, bool, float) and all(
137+
type(v) is ty for v in attribute_value
138+
):
139+
return {
140+
"value": attribute_value,
141+
"type": "array",
142+
}
143+
144+
# Types returned when the serializer for V1 span attributes recurses into some container types.
145+
if isinstance(attribute_value, (dict, list)):
146+
return {
147+
"value": json.dumps(attribute_value),
148+
"type": "string",
149+
}
150+
151+
return None
152+
153+
154+
def _serialized_v1_span_to_serialized_v2_span(
155+
span: "dict[str, Any]", event: "Event"
156+
) -> "dict[str, Any]":
157+
# See SpanBatcher._to_transport_format() for analogous population of all entries except "attributes".
158+
res: "dict[str, Any]" = {
159+
"status": SpanStatus.OK.value,
160+
"is_segment": False,
161+
}
162+
163+
if "trace_id" in span:
164+
res["trace_id"] = span["trace_id"]
165+
166+
if "span_id" in span:
167+
res["span_id"] = span["span_id"]
168+
169+
if "description" in span:
170+
description = span["description"]
171+
172+
if description is None and "op" in span:
173+
description = span["op"]
174+
175+
res["name"] = description
176+
177+
if "start_timestamp" in span:
178+
start_timestamp = None
179+
try:
180+
start_timestamp = datetime_from_isoformat(span["start_timestamp"])
181+
except Exception:
182+
pass
183+
184+
if start_timestamp is not None:
185+
res["start_timestamp"] = start_timestamp.timestamp()
186+
187+
if "timestamp" in span:
188+
end_timestamp = None
189+
try:
190+
end_timestamp = datetime_from_isoformat(span["timestamp"])
191+
except Exception:
192+
pass
193+
194+
if end_timestamp is not None:
195+
res["end_timestamp"] = end_timestamp.timestamp()
196+
197+
if "parent_span_id" in span:
198+
res["parent_span_id"] = span["parent_span_id"]
199+
200+
if "status" in span and span["status"] != SPANSTATUS.OK:
201+
res["status"] = "error"
202+
203+
attributes: "Dict[str, Any]" = {}
204+
205+
if "op" in span:
206+
attributes["sentry.op"] = span["op"]
207+
if "origin" in span:
208+
attributes["sentry.origin"] = span["origin"]
209+
210+
span_data = span.get("data")
211+
if isinstance(span_data, dict):
212+
attributes.update(span_data)
213+
214+
span_tags = span.get("tags")
215+
if isinstance(span_tags, dict):
216+
attributes.update(span_tags)
217+
218+
# See Scope._apply_user_attributes_to_telemetry() for user attributes.
219+
user = event.get("user")
220+
if isinstance(user, dict):
221+
if "id" in user:
222+
attributes["user.id"] = user["id"]
223+
if "username" in user:
224+
attributes["user.name"] = user["username"]
225+
if "email" in user:
226+
attributes["user.email"] = user["email"]
227+
228+
# See Scope.set_global_attributes() for release, environment, and SDK metadata.
229+
if "release" in event:
230+
attributes["sentry.release"] = event["release"]
231+
if "environment" in event:
232+
attributes["sentry.environment"] = event["environment"]
233+
if "transaction" in event:
234+
attributes["sentry.segment.name"] = event["transaction"]
235+
236+
trace_context = event.get("contexts", {}).get("trace", {})
237+
if "span_id" in trace_context:
238+
attributes["sentry.segment.id"] = trace_context["span_id"]
239+
240+
sdk_info = event.get("sdk")
241+
if isinstance(sdk_info, dict):
242+
if "name" in sdk_info:
243+
attributes["sentry.sdk.name"] = sdk_info["name"]
244+
if "version" in sdk_info:
245+
attributes["sentry.sdk.version"] = sdk_info["version"]
246+
247+
if not attributes:
248+
return res
249+
250+
res["attributes"] = {}
251+
for key, value in attributes.items():
252+
converted_value = _serialized_v1_attribute_to_serialized_v2_attribute(value)
253+
if converted_value is None:
254+
continue
255+
256+
res["attributes"][key] = converted_value
257+
258+
# Remove redundant attribute, as status is stored in the status field.
259+
if "status" in res["attributes"]:
260+
del res["attributes"]["status"]
261+
262+
return res
263+
264+
265+
def _split_gen_ai_spans(
266+
event_opt: "Event",
267+
) -> "Optional[tuple[List[Dict[str, object]], List[Dict[str, object]]]]":
268+
if "spans" not in event_opt:
269+
return None
270+
271+
spans: "Any" = event_opt["spans"]
272+
if isinstance(spans, AnnotatedValue):
273+
spans = spans.value
274+
275+
if not isinstance(spans, Iterable):
276+
return None
277+
278+
non_gen_ai_spans = []
279+
gen_ai_spans = []
280+
for span in spans:
281+
if not isinstance(span, dict):
282+
non_gen_ai_spans.append(span)
283+
continue
284+
285+
span_op = span.get("op")
286+
if isinstance(span_op, str) and span_op.startswith("gen_ai."):
287+
gen_ai_spans.append(span)
288+
else:
289+
non_gen_ai_spans.append(span)
290+
291+
return non_gen_ai_spans, gen_ai_spans
292+
293+
92294
def _get_options(*args: "Optional[str]", **kwargs: "Any") -> "Dict[str, Any]":
93295
if args and (isinstance(args[0], (bytes, str)) or args[0] is None):
94296
dsn: "Optional[str]" = args[0]
@@ -874,6 +1076,8 @@ def capture_event(
8741076
event_id = event.get("event_id")
8751077
if event_id is None:
8761078
event["event_id"] = event_id = uuid.uuid4().hex
1079+
1080+
span_recorder_has_gen_ai_span = event.pop("_has_gen_ai_span", False)
8771081
event_opt = self._prepare_event(event, hint, scope)
8781082
if event_opt is None:
8791083
return None
@@ -909,10 +1113,43 @@ def capture_event(
9091113

9101114
envelope = Envelope(headers=headers)
9111115

912-
if is_transaction:
913-
if isinstance(profile, Profile):
914-
envelope.add_profile(profile.to_json(event_opt, self.options))
1116+
if is_transaction and isinstance(profile, Profile):
1117+
envelope.add_profile(profile.to_json(event_opt, self.options))
1118+
1119+
if is_transaction and not span_recorder_has_gen_ai_span:
9151120
envelope.add_transaction(event_opt)
1121+
elif is_transaction:
1122+
split_spans = _split_gen_ai_spans(event_opt)
1123+
if split_spans is None or not split_spans[1]:
1124+
envelope.add_transaction(event_opt)
1125+
else:
1126+
non_gen_ai_spans, gen_ai_spans = split_spans
1127+
1128+
event_opt["spans"] = non_gen_ai_spans
1129+
envelope.add_transaction(event_opt)
1130+
1131+
converted_gen_ai_spans = [
1132+
_serialized_v1_span_to_serialized_v2_span(span, event_opt)
1133+
for span in gen_ai_spans
1134+
if isinstance(span, dict)
1135+
]
1136+
1137+
envelope.add_item(
1138+
Item(
1139+
type=SpanBatcher.TYPE,
1140+
content_type=SpanBatcher.CONTENT_TYPE,
1141+
headers={
1142+
"item_count": len(converted_gen_ai_spans),
1143+
},
1144+
payload=PayloadRef(
1145+
json={
1146+
"version": 2,
1147+
"items": converted_gen_ai_spans,
1148+
},
1149+
),
1150+
)
1151+
)
1152+
9161153
elif is_checkin:
9171154
envelope.add_checkin(event_opt)
9181155
else:

sentry_sdk/consts.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,6 +1218,7 @@ def __init__(
12181218
before_send_metric: "Optional[Callable[[Metric, Hint], Optional[Metric]]]" = None,
12191219
org_id: "Optional[str]" = None,
12201220
strict_trace_continuation: bool = False,
1221+
stream_gen_ai_spans: bool = False,
12211222
) -> None:
12221223
"""Initialize the Sentry SDK with the given parameters. All parameters described here can be used in a call to `sentry_sdk.init()`.
12231224
@@ -1633,6 +1634,9 @@ def __init__(
16331634
but you can provide it explicitly for self-hosted and Relay setups. This value is used for
16341635
trace propagation and for features like `strict_trace_continuation`.
16351636
1637+
:param stream_gen_ai_spans: When set, generative AI spans are sent in a new transport format to
1638+
reduce downstream data loss.
1639+
16361640
:param _experiments:
16371641
"""
16381642
pass

sentry_sdk/tracing.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,11 +1040,23 @@ def finish(
10401040

10411041
return None
10421042

1043-
finished_spans = [
1044-
span.to_json()
1045-
for span in self._span_recorder.spans
1046-
if span.timestamp is not None
1047-
]
1043+
finished_spans = []
1044+
has_gen_ai_span = False
1045+
if client.options.get("stream_gen_ai_spans", False):
1046+
for span in self._span_recorder.spans:
1047+
if span.timestamp is None:
1048+
continue
1049+
1050+
if isinstance(span.op, str) and span.op.startswith("gen_ai."):
1051+
has_gen_ai_span = True
1052+
1053+
finished_spans.append(span.to_json())
1054+
else:
1055+
finished_spans = [
1056+
span.to_json()
1057+
for span in self._span_recorder.spans
1058+
if span.timestamp is not None
1059+
]
10481060

10491061
len_diff = len(self._span_recorder.spans) - len(finished_spans)
10501062
dropped_spans = len_diff + self._span_recorder.dropped_spans
@@ -1076,6 +1088,9 @@ def finish(
10761088
if dropped_spans > 0:
10771089
event["_dropped_spans"] = dropped_spans
10781090

1091+
if has_gen_ai_span:
1092+
event["_has_gen_ai_span"] = True
1093+
10791094
if self._profile is not None and self._profile.valid():
10801095
event["profile"] = self._profile
10811096
self._profile = None

0 commit comments

Comments
 (0)