Skip to content

Commit 64b6965

Browse files
authored
feat(wsgi,asgi): Introduce substitute values for filtered fields in span-streaming mode (#6178)
In the legacy (event-based) pipeline, filtered/removed values in request data are represented as `AnnotatedValue` objects — wrappers that carry metadata (`rem` remarks) consumed by Relay during ingestion. In span-streaming mode, request attributes are plain strings sent directly on spans, so `AnnotatedValue` is not appropriate there. Previously, `_filter_headers` had a `use_annotated_value: bool` parameter that callers set to `False` when building span attributes. This was fragile — callers had to know the right value, and the logic was split between call sites instead of living in one place. This PR: - Adds two new substitute string constants (`OVER_SIZE_LIMIT_SUBSTITUTE`, `UNPARSABLE_RAW_DATA_SUBSTITUTE`) for use in the span-streaming path. - Adds two new `AnnotatedValue` factory methods (`substituted_because_raw_data`, `substituted_because_over_size_limit`) that use remark type `"s"` (substituted) rather than `"x"` (removed), for future use as the SDK fully transitions to span-first. Refs GH-6175 Fixes PY-2396
1 parent 3dacbf2 commit 64b6965

3 files changed

Lines changed: 51 additions & 16 deletions

File tree

sentry_sdk/_types.py

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@
66

77
from typing import TYPE_CHECKING, TypeVar, Union
88

9-
109
# Re-exported for compat, since code out there in the wild might use this variable.
1110
MYPY = TYPE_CHECKING
1211

1312

1413
SENSITIVE_DATA_SUBSTITUTE = "[Filtered]"
1514
BLOB_DATA_SUBSTITUTE = "[Blob substitute]"
15+
OVER_SIZE_LIMIT_SUBSTITUTE = "[Exceeds maximum size]"
16+
UNPARSABLE_RAW_DATA_SUBSTITUTE = "[Unparsable]"
1617

1718

1819
class AnnotatedValue:
@@ -47,6 +48,8 @@ def __len__(self: "AnnotatedValue") -> int:
4748
@classmethod
4849
def removed_because_raw_data(cls) -> "AnnotatedValue":
4950
"""The value was removed because it could not be parsed. This is done for request body values that are not json nor a form."""
51+
# This is the legacy approach - we want to transition over to `substituted_because_raw_data` after we completely transition
52+
# to span-first
5053
return AnnotatedValue(
5154
value="",
5255
metadata={
@@ -59,12 +62,29 @@ def removed_because_raw_data(cls) -> "AnnotatedValue":
5962
},
6063
)
6164

65+
@classmethod
66+
def substituted_because_raw_data(cls) -> "AnnotatedValue":
67+
"""The value was replaced because it could not be parsed. This is done for request body values that are not json nor a form."""
68+
return AnnotatedValue(
69+
value=UNPARSABLE_RAW_DATA_SUBSTITUTE,
70+
metadata={
71+
"rem": [ # Remark
72+
[
73+
"!raw", # Unparsable raw data
74+
"s", # The fields original value was substituted
75+
]
76+
]
77+
},
78+
)
79+
6280
@classmethod
6381
def removed_because_over_size_limit(cls, value: "Any" = "") -> "AnnotatedValue":
6482
"""
6583
The actual value was removed because the size of the field exceeded the configured maximum size,
6684
for example specified with the max_request_body_size sdk option.
6785
"""
86+
# This is the legacy approach - we want to transition over to `substituted_because_over_size_limit` after we completely transition
87+
# to span-first
6888
return AnnotatedValue(
6989
value=value,
7090
metadata={
@@ -77,6 +97,26 @@ def removed_because_over_size_limit(cls, value: "Any" = "") -> "AnnotatedValue":
7797
},
7898
)
7999

100+
@classmethod
101+
def substituted_because_over_size_limit(
102+
cls, value: "Any" = OVER_SIZE_LIMIT_SUBSTITUTE
103+
) -> "AnnotatedValue":
104+
"""
105+
The actual value was replaced because the size of the field exceeded the configured maximum size,
106+
for example specified with the max_request_body_size sdk option.
107+
"""
108+
return AnnotatedValue(
109+
value=value,
110+
metadata={
111+
"rem": [ # Remark
112+
[
113+
"!config", # Because of configured maximum size
114+
"s", # The fields original value was substituted
115+
]
116+
]
117+
},
118+
)
119+
80120
@classmethod
81121
def substituted_because_contains_sensitive_data(cls) -> "AnnotatedValue":
82122
"""The actual value was removed because it contained sensitive information."""
@@ -99,17 +139,10 @@ def substituted_because_contains_sensitive_data(cls) -> "AnnotatedValue":
99139

100140
if TYPE_CHECKING:
101141
from collections.abc import Container, MutableMapping, Sequence
102-
103142
from datetime import datetime
104-
105143
from types import TracebackType
106-
from typing import Any
107-
from typing import Callable
108-
from typing import Dict
109-
from typing import Mapping
110-
from typing import NotRequired
111-
from typing import Optional
112-
from typing import Type
144+
from typing import Any, Callable, Dict, Mapping, NotRequired, Optional, Type
145+
113146
from typing_extensions import Literal, TypedDict
114147

115148
class SDKInfo(TypedDict):

sentry_sdk/integrations/_asgi_common.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,9 @@ def _get_request_data(asgi_scope: "Any") -> "Dict[str, Any]":
9393
if ty in ("http", "websocket"):
9494
request_data["method"] = asgi_scope.get("method")
9595

96-
request_data["headers"] = headers = _filter_headers(_get_headers(asgi_scope))
96+
request_data["headers"] = headers = _filter_headers(
97+
_get_headers(asgi_scope),
98+
)
9799
request_data["query_string"] = _get_query(asgi_scope)
98100

99101
request_data["url"] = _get_url(

sentry_sdk/integrations/_wsgi_common.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -218,11 +218,11 @@ def _filter_headers(
218218
if should_send_default_pii():
219219
return headers
220220

221-
substitute: "Union[AnnotatedValue, str]"
222-
if use_annotated_value:
223-
substitute = AnnotatedValue.removed_because_over_size_limit()
224-
else:
225-
substitute = SENSITIVE_DATA_SUBSTITUTE
221+
substitute: "Union[AnnotatedValue, str]" = (
222+
SENSITIVE_DATA_SUBSTITUTE
223+
if not use_annotated_value
224+
else AnnotatedValue.removed_because_over_size_limit()
225+
)
226226

227227
return {
228228
k: (v if k.upper().replace("-", "_") not in SENSITIVE_HEADERS else substitute)

0 commit comments

Comments
 (0)