Skip to content

Commit 76570be

Browse files
yuvmenandrewshie-sentry
authored andcommitted
fix(deletions): Fix scheduled project deletion timeout failure (#103187)
We are seeing deletions failing on grouphash deletions on `Project.delete()`. This suggests `delete_project_group_hashses` is not deleting all hashes. Refactored this deletion to catch orphan grouphashses that might be left causing this timeout. Utilizes `delete_group_hashes` by adding a mode for deleting all group hashes of a project, renaming it for simplicity.
1 parent 7a7eabe commit 76570be

File tree

2 files changed

+103
-39
lines changed

2 files changed

+103
-39
lines changed

src/sentry/deletions/defaults/group.py

Lines changed: 37 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
from sentry.snuba.dataset import Dataset
3333
from sentry.tasks.delete_seer_grouping_records import may_schedule_task_to_delete_hashes_from_seer
3434
from sentry.utils import metrics
35-
from sentry.utils.query import RangeQuerySetWrapper
3635

3736
from ..base import BaseDeletionTask, BaseRelation, ModelDeletionTask, ModelRelation
3837
from ..manager import DeletionTaskManager
@@ -228,8 +227,12 @@ def _delete_children(self, instance_list: Sequence[Group]) -> None:
228227
# delete_children() will delete GroupHash rows and related GroupHashMetadata rows,
229228
# however, we have added multiple optimizations in this function that would need to
230229
# be ported to a custom deletion task.
231-
delete_group_hashes(instance_list[0].project_id, error_group_ids, seer_deletion=True)
232-
delete_group_hashes(instance_list[0].project_id, issue_platform_group_ids)
230+
delete_project_group_hashes(
231+
instance_list[0].project_id, group_ids_filter=error_group_ids, seer_deletion=True
232+
)
233+
delete_project_group_hashes(
234+
instance_list[0].project_id, group_ids_filter=issue_platform_group_ids
235+
)
233236

234237
# If this isn't a retention cleanup also remove event data.
235238
if not os.environ.get("_SENTRY_CLEANUP"):
@@ -259,21 +262,6 @@ def mark_deletion_in_progress(self, instance_list: Sequence[Group]) -> None:
259262
).update(status=GroupStatus.DELETION_IN_PROGRESS, substatus=None)
260263

261264

262-
def delete_project_group_hashes(project_id: int) -> None:
263-
groups = []
264-
for group in RangeQuerySetWrapper(
265-
Group.objects.filter(project_id=project_id), step=GROUP_CHUNK_SIZE
266-
):
267-
groups.append(group)
268-
269-
error_groups, issue_platform_groups = separate_by_group_category(groups)
270-
error_group_ids = [group.id for group in error_groups]
271-
delete_group_hashes(project_id, error_group_ids, seer_deletion=True)
272-
273-
issue_platform_group_ids = [group.id for group in issue_platform_groups]
274-
delete_group_hashes(project_id, issue_platform_group_ids)
275-
276-
277265
def update_group_hash_metadata_in_batches(hash_ids: Sequence[int]) -> None:
278266
"""
279267
Update seer_matched_grouphash to None for GroupHashMetadata rows
@@ -323,41 +311,52 @@ def update_group_hash_metadata_in_batches(hash_ids: Sequence[int]) -> None:
323311
metrics.incr("deletions.group_hash_metadata.max_iterations_reached", sample_rate=1.0)
324312

325313

326-
def delete_group_hashes(
314+
def delete_project_group_hashes(
327315
project_id: int,
328-
group_ids: Sequence[int],
316+
group_ids_filter: Sequence[int] | None = None,
329317
seer_deletion: bool = False,
330318
) -> None:
331-
# Validate batch size to ensure it's at least 1 to avoid ValueError in range()
319+
"""
320+
Delete GroupHash records for a project.
321+
322+
This is the main function for deleting GroupHash records. It can delete all hashes for a project
323+
(used during project deletion to clean up orphaned records) or delete hashes for specific groups
324+
(used during group deletion).
325+
326+
Args:
327+
project_id: The project to delete hashes for
328+
group_ids_filter: Optional filter for specific group IDs.
329+
- If None: deletes ALL GroupHash records for the project (including orphans)
330+
- If empty: returns immediately (no-op for safety)
331+
- If non-empty: deletes only hashes for those specific groups
332+
seer_deletion: Whether to notify Seer about the deletion
333+
"""
334+
# Safety: empty filter means nothing to delete
335+
if group_ids_filter is not None and len(group_ids_filter) == 0:
336+
return
337+
332338
hashes_batch_size = max(1, options.get("deletions.group-hashes-batch-size"))
333339

334-
# Set a reasonable upper bound on iterations to prevent infinite loops.
335-
# The loop will naturally terminate when no more hashes are found.
336340
iterations = 0
337341
while iterations < GROUP_HASH_ITERATIONS:
338-
qs = GroupHash.objects.filter(project_id=project_id, group_id__in=group_ids).values_list(
339-
"id", "hash"
340-
)[:hashes_batch_size]
341-
hashes_chunk = list(qs)
342+
# Base query: all hashes for this project
343+
qs = GroupHash.objects.filter(project_id=project_id)
344+
345+
# Apply group filter if provided
346+
if group_ids_filter is not None:
347+
qs = qs.filter(group_id__in=group_ids_filter)
348+
349+
hashes_chunk = list(qs.values_list("id", "hash")[:hashes_batch_size])
342350
if not hashes_chunk:
343351
break
344352
try:
345353
if seer_deletion:
346-
# Tell seer to delete grouping records for these groups
347-
# It's low priority to delete the hashes from seer, so we don't want
348-
# any network errors to block the deletion of the groups
349354
hash_values = [gh[1] for gh in hashes_chunk]
350355
may_schedule_task_to_delete_hashes_from_seer(project_id, hash_values)
351356
except Exception:
352357
logger.warning("Error scheduling task to delete hashes from seer")
353358
finally:
354359
hash_ids = [gh[0] for gh in hashes_chunk]
355-
# GroupHashMetadata rows can reference GroupHash rows via seer_matched_grouphash_id.
356-
# Before deleting these GroupHash rows, we need to either:
357-
# 1. Update seer_matched_grouphash to None first (to avoid foreign key constraint errors), OR
358-
# 2. Delete the GroupHashMetadata rows entirely (they'll be deleted anyway)
359-
# If we update the columns first, the deletion of the grouphash metadata rows will have less work to do,
360-
# thus, improving the performance of the deletion.
361360
update_group_hash_metadata_in_batches(hash_ids)
362361
GroupHashMetadata.objects.filter(grouphash_id__in=hash_ids).delete()
363362
GroupHash.objects.filter(id__in=hash_ids).delete()
@@ -367,8 +366,8 @@ def delete_group_hashes(
367366
if iterations == GROUP_HASH_ITERATIONS:
368367
metrics.incr("deletions.group_hashes.max_iterations_reached", sample_rate=1.0)
369368
logger.warning(
370-
"Group hashes batch deletion reached the maximum number of iterations. "
371-
"Investigate if we need to change the GROUP_HASH_ITERATIONS value."
369+
"delete_group_hashes.max_iterations_reached",
370+
extra={"project_id": project_id, "filtered": group_ids_filter is not None},
372371
)
373372

374373

tests/sentry/deletions/test_group.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
from snuba_sdk import Column, Condition, DeleteQuery, Entity, Function, Op, Query, Request
1010

1111
from sentry import deletions, nodestore
12-
from sentry.deletions.defaults.group import update_group_hash_metadata_in_batches
12+
from sentry.deletions.defaults.group import (
13+
delete_project_group_hashes,
14+
update_group_hash_metadata_in_batches,
15+
)
1316
from sentry.deletions.tasks.groups import delete_groups_for_project
1417
from sentry.issues.grouptype import FeedbackGroup, GroupCategory
1518
from sentry.issues.issue_occurrence import IssueOccurrence
@@ -389,6 +392,68 @@ def test_update_group_hash_metadata_in_batches(self) -> None:
389392
else:
390393
raise AssertionError("GroupHashMetadata is None for grouphash id=%s" % grouphash.id)
391394

395+
def test_delete_project_group_hashes_specific_groups(self) -> None:
396+
"""Test deleting grouphashes for specific group IDs (including metadata) and empty list safety."""
397+
self.project.update_option("sentry:similarity_backfill_completed", int(time()))
398+
399+
event_1 = self.store_event(
400+
data={"platform": "python", "stacktrace": {"frames": [{"filename": "error_1.py"}]}},
401+
project_id=self.project.id,
402+
)
403+
event_2 = self.store_event(
404+
data={"platform": "python", "stacktrace": {"frames": [{"filename": "error_2.py"}]}},
405+
project_id=self.project.id,
406+
)
407+
408+
grouphash_1 = GroupHash.objects.get(group=event_1.group)
409+
grouphash_2 = GroupHash.objects.get(group=event_2.group)
410+
assert grouphash_1.metadata is not None
411+
assert grouphash_2.metadata is not None
412+
metadata_1_id = grouphash_1.metadata.id
413+
metadata_2_id = grouphash_2.metadata.id
414+
415+
assert GroupHash.objects.filter(project=self.project).count() == 2
416+
417+
delete_project_group_hashes(
418+
project_id=self.project.id,
419+
group_ids_filter=[event_1.group.id],
420+
)
421+
422+
assert not GroupHash.objects.filter(id=grouphash_1.id).exists()
423+
assert not GroupHashMetadata.objects.filter(id=metadata_1_id).exists()
424+
assert GroupHash.objects.filter(id=grouphash_2.id).exists()
425+
assert GroupHashMetadata.objects.filter(id=metadata_2_id).exists()
426+
427+
# Empty list should be a no-op
428+
delete_project_group_hashes(project_id=self.project.id, group_ids_filter=[])
429+
assert GroupHash.objects.filter(id=grouphash_2.id).exists()
430+
431+
def test_delete_project_group_hashes_all_including_orphans(self) -> None:
432+
"""Test deleting all grouphashes including orphans when group_ids_filter=None."""
433+
self.project.update_option("sentry:similarity_backfill_completed", int(time()))
434+
435+
event = self.store_event(
436+
data={"platform": "python", "stacktrace": {"frames": [{"filename": "error.py"}]}},
437+
project_id=self.project.id,
438+
)
439+
grouphash = GroupHash.objects.get(group=event.group)
440+
assert grouphash.metadata is not None
441+
metadata_id = grouphash.metadata.id
442+
443+
orphan_1 = GroupHash.objects.create(project=self.project, hash="a" * 32, group=None)
444+
orphan_2 = GroupHash.objects.create(project=self.project, hash="b" * 32, group=None)
445+
446+
assert GroupHash.objects.filter(project=self.project).count() == 3
447+
assert GroupHash.objects.filter(project=self.project, group__isnull=True).count() == 2
448+
449+
delete_project_group_hashes(project_id=self.project.id, group_ids_filter=None)
450+
451+
assert not GroupHash.objects.filter(id=grouphash.id).exists()
452+
assert not GroupHash.objects.filter(id=orphan_1.id).exists()
453+
assert not GroupHash.objects.filter(id=orphan_2.id).exists()
454+
assert not GroupHashMetadata.objects.filter(id=metadata_id).exists()
455+
assert GroupHash.objects.filter(project=self.project).count() == 0
456+
392457

393458
class DeleteIssuePlatformTest(TestCase, SnubaTestCase, OccurrenceTestMixin):
394459
referrer = Referrer.TESTING_TEST.value

0 commit comments

Comments
 (0)