fix: Batch commit file change queries to avoid timeouts (#103170)

seer-by-sentry[bot] · getsantry[bot] · yuvmen · web-flow · commit 25f1fd3ca0df · 2025-11-12T10:11:19.000-08:00
Fixes [SENTRY-4596](https://sentry.io/organizations/sentry/issues/6726431295/). The issue was that: Inefficient database query combining multiple `LIKE` conditions with a 200+ item `IN` clause causes PostgreSQL statement timeout, exhausting all task retries. - Introduces `COMMIT_BATCH_SIZE` to limit the number of commits processed in a single query. - Modifies `get_filepath_committers` to batch commit IDs when querying `CommitFileChange` objects, preventing potential query timeouts caused by large `IN` clauses combined with complex `LIKE` conditions. This fix was generated by Seer in Sentry, triggered by Yuval Mandelboum. 👁️ Run ID: 2555348 Co-authored-by: seer-by-sentry[bot] <157164994+seer-by-sentry[bot]@users.noreply.github.com> Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com> Co-authored-by: Yuval Mandelboum <yuval.mandelboum@sentry.io>
diff --git a/src/sentry/utils/committers.py b/src/sentry/utils/committers.py
@@ -24,8 +24,12 @@
 from sentry.users.services.user.service import user_service
 from sentry.utils.event_frames import find_stack_frames, munged_filename_and_frames
 from sentry.utils.hashlib import hash_values
+from sentry.utils.iterators import chunked
 
 PATH_SEPARATORS = frozenset(["/", "\\"])
+# Limit the number of commits to batch in a single query to avoid query timeouts
+# from large IN clauses combined with complex LIKE conditions
+COMMIT_BATCH_SIZE = 50
 
 
 def tokenize_path(path: str) -> Iterator[str]:
@@ -96,11 +100,18 @@ def _get_commit_file_changes(
     # build a single query to get all of the commit file that might match the first n frames
     path_query = reduce(operator.or_, (Q(filename__iendswith=path) for path in filenames))
 
-    commit_file_change_matches = CommitFileChange.objects.filter(
-        path_query, commit_id__in=[c.id for c in commits]
-    )
+    # Batch commits to avoid query timeouts from large IN clauses
+    # combined with complex LIKE conditions
+    all_file_changes: list[CommitFileChange] = []
+    commit_ids = [c.id for c in commits]
+
+    for batch_commit_ids in chunked(commit_ids, COMMIT_BATCH_SIZE):
+        commit_file_change_matches = CommitFileChange.objects.filter(
+            path_query, commit_id__in=batch_commit_ids
+        )
+        all_file_changes.extend(list(commit_file_change_matches))
 
-    return list(commit_file_change_matches)
+    return all_file_changes
 
 
 def _match_commits_paths(