Skip to content

Commit abc3c3c

Browse files
committed
feat: Enhance Sentry scrubber with more flexible markers
The Sentry scrubber has been improved to support a wider range of marker types for identifying sections to be redacted. Now, in addition to exact value matches, it can handle lists, tuples and sets of values as markers. This allows for more granular control over what gets redacted. A new private method `_is_dict_should_be_scrubbed` has been added to encapsulate the logic for determining whether a dictionary should be scrubbed based on its key-value pairs. The README.md file has also been updated to reflect these changes and provide examples of how to use the new features. Tests have been added to ensure that the new functionality works as expected under various scenarios.
1 parent fedaadb commit abc3c3c

File tree

4 files changed

+67
-8
lines changed

4 files changed

+67
-8
lines changed

README.md

+9-2
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,10 @@ from sentry_scrubber.scrubber import SentryScrubber
133133

134134
# Define markers that indicate sections to be removed
135135
dict_markers = {
136-
'visibility': 'private'
136+
'visibility': 'private',
137+
'status': ['error', 'failure'], # List of values to match
138+
'level': ('warning', 'critical'), # Tuple of values to match
139+
'environment': {'staging', 'production'} # Set of values to match
137140
}
138141

139142
scrubber = SentryScrubber(dict_markers_to_scrub=dict_markers)
@@ -144,11 +147,15 @@ event = {
144147
'private_section': {
145148
'visibility': 'private', # This will cause the entire 'private_section' to be redacted
146149
'secret_data': 'sensitive information'
150+
},
151+
'error_section': {
152+
'status': 'error', # This will cause the entire 'error_section' to be redacted
153+
'details': 'Error details'
147154
}
148155
}
149156

150157
scrubbed = scrubber.scrub_event(event)
151-
# Result: {'public_info': 'This is public', 'private_section': '<redacted>'}
158+
# Result: {'public_info': 'This is public', 'private_section': '<redacted>', 'error_section': '<redacted>'}
152159
```
153160

154161
### Exclusions

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "sentry-scrubber"
3-
version = "2.1.0"
3+
version = "2.2.0"
44
description = "A lightweight and flexible Python library for scrubbing sensitive information from Sentry events before they are sent to the server."
55
authors = ["Andrei Andreev"]
66
readme = "README.md"

sentry_scrubber/scrubber.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -233,11 +233,9 @@ def scrub_entity_recursively(self, entity: Union[str, Dict, List, Any], sensitiv
233233
result[key] = value
234234
continue
235235

236-
if marker_value := self.dict_markers_to_scrub.get(key):
237-
should_be_scrubbed = value == marker_value
238-
if should_be_scrubbed:
239-
result = self.placeholder
240-
break
236+
if self._is_dict_should_be_scrubbed(key, value):
237+
result = self.placeholder
238+
break
241239

242240
if key in self.dict_keys_for_scrub:
243241
if isinstance(value, str):
@@ -255,3 +253,12 @@ def scrub_entity_recursively(self, entity: Union[str, Dict, List, Any], sensitiv
255253
return tuple(self.scrub_entity_recursively(item, sensitive_strings, depth) for item in entity)
256254

257255
return entity
256+
257+
def _is_dict_should_be_scrubbed(self, key: str, value: Any):
258+
if marker_value := self.dict_markers_to_scrub.get(key):
259+
should_be_scrubbed = value == marker_value
260+
if should_be_scrubbed:
261+
return True
262+
if isinstance(marker_value, (list, tuple, set)):
263+
return value in marker_value
264+
return False

sentry_scrubber/tests/test_scrubber.py

+45
Original file line numberDiff line numberDiff line change
@@ -426,3 +426,48 @@ def test_scrub_list(scrubber):
426426
actual = scrubber.scrub_entity_recursively(['/home/username/some/'], sensitive_string)
427427
assert actual == ['/home/<redacted>/some/']
428428
assert 'username' in sensitive_string
429+
430+
431+
@pytest.mark.parametrize(
432+
"key, value, dict_markers_to_scrub, expected",
433+
[
434+
# Test case 1: Key not in dict_markers_to_scrub
435+
("unknown_key", "value", {}, False),
436+
437+
# Test case 2: Key in dict_markers_to_scrub, value matches exactly
438+
("api_key", "secret123", {"api_key": "secret123"}, True),
439+
440+
# Test case 3: Key in dict_markers_to_scrub, value doesn't match
441+
("api_key", "different_value", {"api_key": "secret123"}, False),
442+
443+
# Test case 4: Key in dict_markers_to_scrub, value in list of marker values
444+
("status", "error", {"status": ["error", "failure"]}, True),
445+
446+
# Test case 5: Key in dict_markers_to_scrub, value not in list of marker values
447+
("status", "success", {"status": ["error", "failure"]}, False),
448+
449+
# Test case 6: Key in dict_markers_to_scrub, value in tuple of marker values
450+
("level", "critical", {"level": ("warning", "critical")}, True),
451+
452+
# Test case 7: Key in dict_markers_to_scrub, value in set of marker values
453+
("environment", "production", {"environment": {"staging", "production"}}, True),
454+
],
455+
)
456+
def test_is_dict_should_be_scrubbed(key, value, dict_markers_to_scrub, expected):
457+
"""Test the _is_dict_should_be_scrubbed method with various inputs."""
458+
scrubber = SentryScrubber(dict_markers_to_scrub=dict_markers_to_scrub)
459+
result = scrubber._is_dict_should_be_scrubbed(key, value)
460+
assert result == expected
461+
462+
463+
def test_is_dict_should_be_scrubbed_with_empty_markers():
464+
"""Test the method with empty dict_markers_to_scrub."""
465+
scrubber = SentryScrubber()
466+
assert not scrubber._is_dict_should_be_scrubbed("any_key", "any_value")
467+
468+
469+
def test_is_dict_should_be_scrubbed_with_none_value():
470+
"""Test the method with None value."""
471+
scrubber = SentryScrubber(dict_markers_to_scrub={"key": None})
472+
assert not scrubber._is_dict_should_be_scrubbed("key", None)
473+
assert not scrubber._is_dict_should_be_scrubbed("key", "not_none")

0 commit comments

Comments
 (0)