3232from sentry .snuba .dataset import Dataset
3333from sentry .tasks .delete_seer_grouping_records import may_schedule_task_to_delete_hashes_from_seer
3434from sentry .utils import metrics
35- from sentry .utils .query import RangeQuerySetWrapper
3635
3736from ..base import BaseDeletionTask , BaseRelation , ModelDeletionTask , ModelRelation
3837from ..manager import DeletionTaskManager
@@ -228,8 +227,12 @@ def _delete_children(self, instance_list: Sequence[Group]) -> None:
228227 # delete_children() will delete GroupHash rows and related GroupHashMetadata rows,
229228 # however, we have added multiple optimizations in this function that would need to
230229 # be ported to a custom deletion task.
231- delete_group_hashes (instance_list [0 ].project_id , error_group_ids , seer_deletion = True )
232- delete_group_hashes (instance_list [0 ].project_id , issue_platform_group_ids )
230+ delete_project_group_hashes (
231+ instance_list [0 ].project_id , group_ids_filter = error_group_ids , seer_deletion = True
232+ )
233+ delete_project_group_hashes (
234+ instance_list [0 ].project_id , group_ids_filter = issue_platform_group_ids
235+ )
233236
234237 # If this isn't a retention cleanup also remove event data.
235238 if not os .environ .get ("_SENTRY_CLEANUP" ):
@@ -259,21 +262,6 @@ def mark_deletion_in_progress(self, instance_list: Sequence[Group]) -> None:
259262 ).update (status = GroupStatus .DELETION_IN_PROGRESS , substatus = None )
260263
261264
262- def delete_project_group_hashes (project_id : int ) -> None :
263- groups = []
264- for group in RangeQuerySetWrapper (
265- Group .objects .filter (project_id = project_id ), step = GROUP_CHUNK_SIZE
266- ):
267- groups .append (group )
268-
269- error_groups , issue_platform_groups = separate_by_group_category (groups )
270- error_group_ids = [group .id for group in error_groups ]
271- delete_group_hashes (project_id , error_group_ids , seer_deletion = True )
272-
273- issue_platform_group_ids = [group .id for group in issue_platform_groups ]
274- delete_group_hashes (project_id , issue_platform_group_ids )
275-
276-
277265def update_group_hash_metadata_in_batches (hash_ids : Sequence [int ]) -> None :
278266 """
279267 Update seer_matched_grouphash to None for GroupHashMetadata rows
@@ -323,41 +311,52 @@ def update_group_hash_metadata_in_batches(hash_ids: Sequence[int]) -> None:
323311 metrics .incr ("deletions.group_hash_metadata.max_iterations_reached" , sample_rate = 1.0 )
324312
325313
326- def delete_group_hashes (
314+ def delete_project_group_hashes (
327315 project_id : int ,
328- group_ids : Sequence [int ],
316+ group_ids_filter : Sequence [int ] | None = None ,
329317 seer_deletion : bool = False ,
330318) -> None :
331- # Validate batch size to ensure it's at least 1 to avoid ValueError in range()
319+ """
320+ Delete GroupHash records for a project.
321+
322+ This is the main function for deleting GroupHash records. It can delete all hashes for a project
323+ (used during project deletion to clean up orphaned records) or delete hashes for specific groups
324+ (used during group deletion).
325+
326+ Args:
327+ project_id: The project to delete hashes for
328+ group_ids_filter: Optional filter for specific group IDs.
329+ - If None: deletes ALL GroupHash records for the project (including orphans)
330+ - If empty: returns immediately (no-op for safety)
331+ - If non-empty: deletes only hashes for those specific groups
332+ seer_deletion: Whether to notify Seer about the deletion
333+ """
334+ # Safety: empty filter means nothing to delete
335+ if group_ids_filter is not None and len (group_ids_filter ) == 0 :
336+ return
337+
332338 hashes_batch_size = max (1 , options .get ("deletions.group-hashes-batch-size" ))
333339
334- # Set a reasonable upper bound on iterations to prevent infinite loops.
335- # The loop will naturally terminate when no more hashes are found.
336340 iterations = 0
337341 while iterations < GROUP_HASH_ITERATIONS :
338- qs = GroupHash .objects .filter (project_id = project_id , group_id__in = group_ids ).values_list (
339- "id" , "hash"
340- )[:hashes_batch_size ]
341- hashes_chunk = list (qs )
342+ # Base query: all hashes for this project
343+ qs = GroupHash .objects .filter (project_id = project_id )
344+
345+ # Apply group filter if provided
346+ if group_ids_filter is not None :
347+ qs = qs .filter (group_id__in = group_ids_filter )
348+
349+ hashes_chunk = list (qs .values_list ("id" , "hash" )[:hashes_batch_size ])
342350 if not hashes_chunk :
343351 break
344352 try :
345353 if seer_deletion :
346- # Tell seer to delete grouping records for these groups
347- # It's low priority to delete the hashes from seer, so we don't want
348- # any network errors to block the deletion of the groups
349354 hash_values = [gh [1 ] for gh in hashes_chunk ]
350355 may_schedule_task_to_delete_hashes_from_seer (project_id , hash_values )
351356 except Exception :
352357 logger .warning ("Error scheduling task to delete hashes from seer" )
353358 finally :
354359 hash_ids = [gh [0 ] for gh in hashes_chunk ]
355- # GroupHashMetadata rows can reference GroupHash rows via seer_matched_grouphash_id.
356- # Before deleting these GroupHash rows, we need to either:
357- # 1. Update seer_matched_grouphash to None first (to avoid foreign key constraint errors), OR
358- # 2. Delete the GroupHashMetadata rows entirely (they'll be deleted anyway)
359- # If we update the columns first, the deletion of the grouphash metadata rows will have less work to do,
360- # thus, improving the performance of the deletion.
361360 update_group_hash_metadata_in_batches (hash_ids )
362361 GroupHashMetadata .objects .filter (grouphash_id__in = hash_ids ).delete ()
363362 GroupHash .objects .filter (id__in = hash_ids ).delete ()
@@ -367,8 +366,8 @@ def delete_group_hashes(
367366 if iterations == GROUP_HASH_ITERATIONS :
368367 metrics .incr ("deletions.group_hashes.max_iterations_reached" , sample_rate = 1.0 )
369368 logger .warning (
370- "Group hashes batch deletion reached the maximum number of iterations. "
371- "Investigate if we need to change the GROUP_HASH_ITERATIONS value."
369+ "delete_group_hashes.max_iterations_reached" ,
370+ extra = { "project_id" : project_id , "filtered" : group_ids_filter is not None },
372371 )
373372
374373
0 commit comments