Skip to content

Commit 25f1fd3

Browse files
seer-by-sentry[bot]getsantry[bot]yuvmen
authored
fix: Batch commit file change queries to avoid timeouts (#103170)
Fixes [SENTRY-4596](https://sentry.io/organizations/sentry/issues/6726431295/). The issue was that: Inefficient database query combining multiple `LIKE` conditions with a 200+ item `IN` clause causes PostgreSQL statement timeout, exhausting all task retries. - Introduces `COMMIT_BATCH_SIZE` to limit the number of commits processed in a single query. - Modifies `get_filepath_committers` to batch commit IDs when querying `CommitFileChange` objects, preventing potential query timeouts caused by large `IN` clauses combined with complex `LIKE` conditions. This fix was generated by Seer in Sentry, triggered by Yuval Mandelboum. 👁️ Run ID: 2555348 Co-authored-by: seer-by-sentry[bot] <157164994+seer-by-sentry[bot]@users.noreply.github.com> Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com> Co-authored-by: Yuval Mandelboum <[email protected]>
1 parent 408e1a9 commit 25f1fd3

File tree

1 file changed

+15
-4
lines changed

1 file changed

+15
-4
lines changed

src/sentry/utils/committers.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,12 @@
2424
from sentry.users.services.user.service import user_service
2525
from sentry.utils.event_frames import find_stack_frames, munged_filename_and_frames
2626
from sentry.utils.hashlib import hash_values
27+
from sentry.utils.iterators import chunked
2728

2829
PATH_SEPARATORS = frozenset(["/", "\\"])
30+
# Limit the number of commits to batch in a single query to avoid query timeouts
31+
# from large IN clauses combined with complex LIKE conditions
32+
COMMIT_BATCH_SIZE = 50
2933

3034

3135
def tokenize_path(path: str) -> Iterator[str]:
@@ -96,11 +100,18 @@ def _get_commit_file_changes(
96100
# build a single query to get all of the commit file that might match the first n frames
97101
path_query = reduce(operator.or_, (Q(filename__iendswith=path) for path in filenames))
98102

99-
commit_file_change_matches = CommitFileChange.objects.filter(
100-
path_query, commit_id__in=[c.id for c in commits]
101-
)
103+
# Batch commits to avoid query timeouts from large IN clauses
104+
# combined with complex LIKE conditions
105+
all_file_changes: list[CommitFileChange] = []
106+
commit_ids = [c.id for c in commits]
107+
108+
for batch_commit_ids in chunked(commit_ids, COMMIT_BATCH_SIZE):
109+
commit_file_change_matches = CommitFileChange.objects.filter(
110+
path_query, commit_id__in=batch_commit_ids
111+
)
112+
all_file_changes.extend(list(commit_file_change_matches))
102113

103-
return list(commit_file_change_matches)
114+
return all_file_changes
104115

105116

106117
def _match_commits_paths(

0 commit comments

Comments
 (0)