Skip to content

Commit e886904

Browse files
authored
fix(ci_visibility): git multi-process issues (#18156)
## Description Fixes two sources of spurious git warning logs that customers see when using the pytest plugin under pytest-xdist (or other multi-process) environments. **Problem 1: shallow lock contention** Multiple xdist workers each call `git fetch --update-shallow` during startup. They all compete for `.git/shallow.lock`, causing repeated `fatal: Unable to create '.../.git/shallow.lock': File exists` warnings. Fixed by: - Adding `_call_git_with_lock_retry()` in `git.py`, which retries up to 5 times with exponential back-off + jitter when lock contention is detected. - Forcing `LC_ALL=C` / `LANG=C` in `_call_git()` so git's error messages are always in English, making the lock-contention regex reliable regardless of the user's locale. - Short-circuiting `unshallow_repository()` at entry when the repo is already non-shallow (a sibling worker may have finished unshallowing while we were waiting on the lock), and aborting the retry loop early under the same condition. **Problem 2: merge-base called on a shallow repo** `git merge-base` was called inside `get_env_tags()`, which runs at plugin startup before any unshallowing. On shallow clones the commits aren't locally available yet, so the call always fails silently. Fixed by: - Removing the call from `get_env_tags()` and deferring it to the new `SessionManager._update_pr_merge_base()` helper, which is called from `upload_git_data()` after unshallowing has completed. - Moving the "all commits already in backend" early-return in `upload_git_data()` to run *after* `_update_pr_merge_base()`, so `PULL_REQUEST_BASE_BRANCH_SHA` is always populated even when no pack upload is needed. - The standalone helper `get_pr_base_commit_sha()` that was the only caller is removed as dead code. ## Testing New tests added: - `TestGitLockRetry` — unit tests for `_call_git_with_lock_retry`: success path, non-lock errors skip retry, lock error retries then succeeds, retry exhaustion returns last failure, `unshallow_repository` uses the retry helper, `unshallow_repository` skips the fetch when the repo is already non-shallow, `unshallow_repository` aborts retry when a sibling unshallows concurrently, `should_retry` callback returning False aborts the loop, and `_call_git` forces `LC_ALL=C`/`LANG=C`. - `TestUpdatePrMergeBase` — unit tests for `SessionManager._update_pr_merge_base`: skips when SHA already set, skips when inputs are missing, sets the tag when both SHAs are present, skips when merge-base returns empty, and three wiring tests verifying it is called from `upload_git_data()` in shallow, non-shallow, and "all commits already known" scenarios. - Updated `TestGitUnshallow` assertions to reflect that `_call_git_with_lock_retry` now calls `_call_git(args, input_string)` with two positional arguments (the old `[([cmd], _)]` destructuring assumed a single positional arg) and to patch `is_shallow_repository` for the new entry guard. No regression tests (real shallow clone + full plugin run) were added; that would require a more involved fixture setup. ## Risks Low. The changes are confined to the pytest plugin internals and only affect CI Visibility data collection, not any tracer hot paths. - The retry logic only activates when a specific lock-error string is matched; all other errors fall through immediately as before. - The C locale override only affects the subprocess environment for git commands; it does not change the locale for the rest of the process. - `PULL_REQUEST_BASE_BRANCH_SHA` was already an optional tag — if `merge-base` fails it is simply absent, same as before. - Removing `get_pr_base_commit_sha` is safe; grep confirms it had no callers outside its own definition. Co-authored-by: federico.mon <federico.mon@datadoghq.com>
1 parent b26f246 commit e886904

9 files changed

Lines changed: 492 additions & 117 deletions

File tree

ddtrace/testing/internal/env_tags.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from ddtrace.testing.internal.ci import CITag
1010
from ddtrace.testing.internal.constants import DD_TEST_OPTIMIZATION_ENV_DATA_FILE
1111
from ddtrace.testing.internal.git import GitTag
12-
from ddtrace.testing.internal.git import get_pr_base_commit_sha
1312
from ddtrace.testing.internal.git import get_workspace_path
1413
from ddtrace.testing.internal.offline_mode import get_offline_mode
1514
from ddtrace.testing.internal.offline_mode import resolve_rlocation
@@ -127,17 +126,6 @@ def get_env_tags() -> dict[str, str]:
127126
if head_sha := tags.get(GitTag.COMMIT_HEAD_SHA):
128127
merge_tags(tags, git.get_git_head_tags_from_git_command(head_sha))
129128

130-
# For GitHub Actions: pull_request.base.sha is the HEAD of the base branch, not the merge base.
131-
# Compute the true base commit SHA via `git merge-base` when it hasn't already been set by the
132-
# CI provider (e.g. GitLab sets CI_MERGE_REQUEST_DIFF_BASE_SHA directly).
133-
if not tags.get(GitTag.PULL_REQUEST_BASE_BRANCH_SHA):
134-
base_branch_head_sha = tags.get(GitTag.PULL_REQUEST_BASE_BRANCH_HEAD_SHA)
135-
pr_head_sha = tags.get(GitTag.COMMIT_HEAD_SHA)
136-
if base_branch_head_sha and pr_head_sha:
137-
merge_base_sha = get_pr_base_commit_sha(base_branch_head_sha, pr_head_sha)
138-
if merge_base_sha:
139-
tags[GitTag.PULL_REQUEST_BASE_BRANCH_SHA] = merge_base_sha
140-
141129
normalize_git_tags(tags)
142130

143131
if workspace_path := tags.get(CITag.WORKSPACE_PATH):

ddtrace/testing/internal/git.py

Lines changed: 69 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
from dataclasses import dataclass
22
import logging
3+
import os
34
from pathlib import Path
45
import random
56
import re
67
import shutil
78
import subprocess # nosec: B404
89
import tempfile
10+
import time
911
import typing as t
1012

1113
from ddtrace.testing.internal.telemetry import GitTelemetry
@@ -15,6 +17,12 @@
1517

1618
log = logging.getLogger(__name__)
1719

20+
# Matches git lock-contention errors, e.g.:
21+
# fatal: Unable to create '/path/.git/shallow.lock': File exists.
22+
_LOCK_ERROR_RE = re.compile(r"Unable to create .+\.lock.+File exists", re.DOTALL)
23+
_LOCK_MAX_RETRIES = 5
24+
_LOCK_BASE_DELAY_SECONDS = 0.5
25+
1826

1927
class GitTag:
2028
# Git Repository URL
@@ -120,6 +128,10 @@ def _call_git(self, args: list[str], input_string: t.Optional[str] = None) -> _G
120128
git_cmd = [self.git_command, *args]
121129
log.debug("Running git command: %r", git_cmd)
122130

131+
# Force C locale so git's error messages are predictable; the lock-contention
132+
# regex in _call_git_with_lock_retry matches the English "File exists" string.
133+
git_env = {**os.environ, "LC_ALL": "C", "LANG": "C"}
134+
123135
with StopWatch() as sw:
124136
process = subprocess.Popen( # nosec: B603
125137
git_cmd,
@@ -129,6 +141,7 @@ def _call_git(self, args: list[str], input_string: t.Optional[str] = None) -> _G
129141
cwd=self.cwd,
130142
encoding="utf-8",
131143
errors="surrogateescape",
144+
env=git_env,
132145
)
133146
stdout, stderr = process.communicate(input=input_string)
134147

@@ -150,6 +163,45 @@ def _git_output(self, args: list[str], telemetry_type: t.Optional[GitTelemetry]
150163
return ""
151164
return result.stdout
152165

166+
def _call_git_with_lock_retry(
167+
self,
168+
args: list[str],
169+
input_string: t.Optional[str] = None,
170+
should_retry: t.Optional[t.Callable[[], bool]] = None,
171+
) -> _GitSubprocessDetails:
172+
"""Call git with automatic retry on lock-file contention errors.
173+
174+
In multi-process test environments several workers may issue git
175+
commands simultaneously. Commands that write a lock file (e.g.
176+
``git fetch --update-shallow``) fail with "File exists" when another
177+
process holds the lock. This helper retries such transient failures
178+
with exponential back-off so callers never need to handle the retry
179+
logic themselves.
180+
181+
``should_retry`` is called after each back-off sleep; returning False
182+
aborts the retry loop so callers can stop once another process has
183+
completed the same work (e.g. an xdist sibling that finished
184+
unshallowing while we were waiting on the lock).
185+
"""
186+
result = self._call_git(args, input_string)
187+
for attempt in range(_LOCK_MAX_RETRIES):
188+
if result.return_code == 0 or not _LOCK_ERROR_RE.search(result.stderr):
189+
break
190+
delay = _LOCK_BASE_DELAY_SECONDS * (2**attempt) + random.uniform(0, 1) # nosec: B311
191+
log.debug(
192+
"Git lock contention (attempt %d/%d), retrying in %.1fs: %s",
193+
attempt + 1,
194+
_LOCK_MAX_RETRIES,
195+
delay,
196+
result.stderr,
197+
)
198+
time.sleep(delay)
199+
if should_retry is not None and not should_retry():
200+
log.debug("Git lock retry aborted: work no longer needed")
201+
break
202+
result = self._call_git(args, input_string)
203+
return result
204+
153205
def get_git_version(self) -> tuple[int, ...]:
154206
output = self._git_output(["--version"]) # "git version 1.2.3"
155207
try:
@@ -232,6 +284,12 @@ def is_shallow_repository(self) -> bool:
232284
return output == "true"
233285

234286
def unshallow_repository(self, refspec: t.Optional[str] = None, parent_only: bool = False) -> _GitSubprocessDetails:
287+
# If another process (e.g. a parallel xdist worker) has already unshallowed the
288+
# repository, skip the redundant fetch.
289+
if not self.is_shallow_repository():
290+
log.debug("Repository is no longer shallow, skipping unshallow")
291+
return _GitSubprocessDetails(stdout="", stderr="", return_code=0, elapsed_seconds=0.0)
292+
235293
remote_name = self.get_remote_name()
236294
command = [
237295
"fetch",
@@ -245,8 +303,13 @@ def unshallow_repository(self, refspec: t.Optional[str] = None, parent_only: boo
245303
if refspec:
246304
command.append(refspec)
247305

248-
result = self._call_git(command)
306+
result = self._call_git_with_lock_retry(command, should_retry=self.is_shallow_repository)
249307
if result.return_code != 0:
308+
# The retry loop may have aborted because a sibling worker finished
309+
# unshallowing in the meantime; in that case treat the operation as a success.
310+
if not self.is_shallow_repository():
311+
log.debug("Repository was unshallowed by another process during retry")
312+
return _GitSubprocessDetails(stdout="", stderr="", return_code=0, elapsed_seconds=0.0)
250313
log.warning("Error unshallowing repo for refspec %s: %s", refspec, result.stderr)
251314
return result
252315

@@ -294,7 +357,11 @@ def try_all_unshallow_repository_methods(self) -> bool:
294357

295358
def get_merge_base(self, sha1: str, sha2: str) -> str:
296359
"""Return the best common ancestor commit SHA of sha1 and sha2."""
297-
return self._git_output(["merge-base", sha1, sha2])
360+
result = self._call_git(["merge-base", sha1, sha2])
361+
if result.return_code != 0:
362+
log.warning("Error getting merge-base for %s and %s: %s", sha1, sha2, result.stderr)
363+
return ""
364+
return result.stdout
298365

299366
def pack_objects(self, revisions: list[str]) -> t.Iterable[Path]:
300367
base_name = str(random.randint(1, 1000000)) # nosec: B311
@@ -324,21 +391,6 @@ def pack_objects(self, revisions: list[str]) -> t.Iterable[Path]:
324391
yield packfile
325392

326393

327-
def get_pr_base_commit_sha(base_branch_head_sha: str, head_sha: str) -> t.Optional[str]:
328-
"""Compute the true PR base commit SHA as the merge base of base_branch_head_sha and head_sha.
329-
330-
On GitHub Actions, pull_request.base.sha is the HEAD of the base branch, not the actual merge
331-
base commit. This function uses `git merge-base` to find the common ancestor.
332-
"""
333-
try:
334-
git = Git()
335-
except RuntimeError as e:
336-
log.warning("Error getting git data for merge-base computation: %s", e)
337-
return None
338-
339-
return git.get_merge_base(base_branch_head_sha, head_sha) or None
340-
341-
342394
def get_git_tags_from_git_command() -> dict[str, t.Optional[str]]:
343395
try:
344396
git = Git()

ddtrace/testing/internal/session_manager.py

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,20 @@ def _get_test_session_name(self) -> str:
411411

412412
return self.session.test_command
413413

414+
def _update_pr_merge_base(self, git: Git) -> None:
415+
"""Compute PULL_REQUEST_BASE_BRANCH_SHA via git merge-base if not already set.
416+
417+
Called from upload_git_data() after any unshallowing so the commits are
418+
guaranteed to be present in the local history.
419+
"""
420+
if self.env_tags.get(GitTag.PULL_REQUEST_BASE_BRANCH_SHA):
421+
return
422+
base_sha = self.env_tags.get(GitTag.PULL_REQUEST_BASE_BRANCH_HEAD_SHA)
423+
head_sha = self.env_tags.get(GitTag.COMMIT_HEAD_SHA)
424+
if base_sha and head_sha:
425+
if merge_base := git.get_merge_base(base_sha, head_sha):
426+
self.env_tags[GitTag.PULL_REQUEST_BASE_BRANCH_SHA] = merge_base
427+
414428
def upload_git_data(self) -> None:
415429
# NOTE: In manifest mode (Bazel sandbox), git commands are unavailable
416430
# and the external tool has already uploaded git pack data before the sandbox
@@ -438,26 +452,34 @@ def upload_git_data(self) -> None:
438452

439453
commits_not_in_backend = list(set(latest_commits) - set(backend_commits))
440454

441-
if len(commits_not_in_backend) == 0:
442-
log.debug("All latest commits found in backend, skipping metadata upload")
443-
return
444-
445455
if git.is_shallow_repository() and git.get_git_version() >= (2, 27, 0):
446456
log.debug("Shallow repository detected on git > 2.27 detected, unshallowing")
447457
unshallow_successful = git.try_all_unshallow_repository_methods()
448458
if unshallow_successful:
449-
log.debug("Unshallow successful, getting latest commits from backend based on unshallowed commits")
450-
latest_commits = git.get_latest_commits()
451-
backend_commits = self.api_client.get_known_commits(latest_commits)
452-
if backend_commits is None:
453-
log.warning("search_commits failed after unshallow, aborting git metadata upload")
454-
TelemetryAPI.get().record_git_pack_data(0, 0)
455-
return
456-
457-
commits_not_in_backend = list(set(latest_commits) - set(backend_commits))
458-
else:
459+
if len(commits_not_in_backend) > 0:
460+
log.debug("Unshallow successful, getting latest commits from backend based on unshallowed commits")
461+
latest_commits = git.get_latest_commits()
462+
backend_commits = self.api_client.get_known_commits(latest_commits)
463+
if backend_commits is None:
464+
log.warning("search_commits failed after unshallow, aborting git metadata upload")
465+
TelemetryAPI.get().record_git_pack_data(0, 0)
466+
return
467+
468+
commits_not_in_backend = list(set(latest_commits) - set(backend_commits))
469+
elif len(commits_not_in_backend) > 0:
459470
log.warning("Failed to unshallow repository, continuing to send pack data")
460471

472+
# Compute the true PR merge-base now that the repo has been unshallowed (if needed).
473+
# This is done here rather than at env_tags collection time because on shallow clones the
474+
# required commits are only available after the fetch above completes.
475+
# Must run before the early-return below so sessions where all commits are already
476+
# known to the backend still populate git.pull_request.base_branch_sha.
477+
self._update_pr_merge_base(git)
478+
479+
if len(commits_not_in_backend) == 0:
480+
log.debug("All latest commits found in backend, skipping metadata upload")
481+
return
482+
461483
revisions_to_send = git.get_filtered_revisions(
462484
excluded_commits=backend_commits, included_commits=commits_not_in_backend
463485
)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
fixes:
3+
- |
4+
CI Visibility: Fixes spurious ``git`` warnings when running tests with pytest-xdist.
5+
Simultaneous ``git fetch --update-shallow`` calls now retry with exponential back-off
6+
on ``.git/shallow.lock`` contention, skip the fetch entirely once a sibling worker has
7+
already unshallowed the repository, and run under the ``C`` locale so the contention
8+
check is robust against non-English git messages. ``git merge-base`` is deferred until
9+
after unshallowing so the required commits are available locally.

tests/testing/internal/test_bazel_offline_session_manager.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import os
66
from pathlib import Path
7-
from unittest.mock import Mock
87
from unittest.mock import patch
98

109
# Path is also used in test_env_tags_non_empty_in_online_mode for get_workspace_path mock
@@ -16,6 +15,7 @@
1615
from ddtrace.testing.internal.session_manager import SessionManager
1716
from ddtrace.testing.internal.test_data import TestSession
1817
from tests.testing.mocks import MockDefaults
18+
from tests.testing.mocks import get_mock_git_instance
1919
from tests.testing.mocks import mock_api_client_settings
2020

2121

@@ -163,10 +163,7 @@ def test_git_upload_proceeds_in_online_mode(self, monkeypatch, tmp_path):
163163

164164
sm = self._build_sm_with_mocked_api(monkeypatch, env)
165165

166-
mock_git_instance = Mock()
167-
mock_git_instance.get_latest_commits.return_value = []
168-
mock_git_instance.get_filtered_revisions.return_value = []
169-
mock_git_instance.pack_objects.return_value = iter([])
166+
mock_git_instance = get_mock_git_instance()
170167

171168
with (
172169
patch("ddtrace.testing.internal.session_manager.Git", return_value=mock_git_instance) as mock_git_cls,

tests/testing/internal/test_env_tags.py

Lines changed: 1 addition & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -387,72 +387,12 @@ def test_github_actions_base_branch_head_sha_extracted_from_event(
387387
monkeypatch.setattr(os, "environ", ci_env)
388388
monkeypatch.chdir(git_repo)
389389

390-
with mock.patch("ddtrace.testing.internal.env_tags.get_pr_base_commit_sha", return_value=None):
391-
tags = get_env_tags()
390+
tags = get_env_tags()
392391

393392
assert tags[GitTag.PULL_REQUEST_BASE_BRANCH_HEAD_SHA] == base_branch_head_sha
394393
assert GitTag.PULL_REQUEST_BASE_BRANCH_SHA not in tags
395394

396395

397-
def test_github_actions_base_branch_sha_computed_as_merge_base(
398-
monkeypatch: pytest.MonkeyPatch, git_shallow_repo: tuple[str, str]
399-
) -> None:
400-
"""PULL_REQUEST_BASE_BRANCH_SHA is set to the merge-base of base_branch_head_sha and head_sha."""
401-
git_repo, head_sha = git_shallow_repo
402-
github_sha = "abcd1234"
403-
base_branch_head_sha = "aabbccdd" * 5
404-
expected_merge_base = "deadbeef" * 5
405-
406-
github_event_path = f"{git_repo}/event.json"
407-
with open(github_event_path, "w") as f:
408-
json.dump(
409-
{
410-
"pull_request": {
411-
"head": {"sha": head_sha},
412-
"base": {"sha": base_branch_head_sha},
413-
}
414-
},
415-
f,
416-
)
417-
418-
ci_env = {
419-
"GITHUB_SHA": github_sha,
420-
"GITHUB_EVENT_PATH": github_event_path,
421-
}
422-
423-
monkeypatch.setattr(os, "environ", ci_env)
424-
monkeypatch.chdir(git_repo)
425-
426-
with mock.patch(
427-
"ddtrace.testing.internal.env_tags.get_pr_base_commit_sha", return_value=expected_merge_base
428-
) as mock_merge_base:
429-
tags = get_env_tags()
430-
431-
mock_merge_base.assert_called_once_with(base_branch_head_sha, head_sha)
432-
assert tags[GitTag.PULL_REQUEST_BASE_BRANCH_SHA] == expected_merge_base
433-
434-
435-
def test_github_actions_base_branch_sha_not_overwritten_when_already_set(
436-
monkeypatch: pytest.MonkeyPatch, git_repo: str
437-
) -> None:
438-
"""PULL_REQUEST_BASE_BRANCH_SHA from DD_GIT_PULL_REQUEST_BASE_BRANCH_SHA is not overwritten by merge-base."""
439-
existing_base_sha = "cafebabe" * 5
440-
441-
ci_env = {
442-
"GITHUB_SHA": "abcd1234",
443-
"DD_GIT_PULL_REQUEST_BASE_BRANCH_SHA": existing_base_sha,
444-
}
445-
446-
monkeypatch.setattr(os, "environ", ci_env)
447-
monkeypatch.chdir(git_repo)
448-
449-
with mock.patch("ddtrace.testing.internal.env_tags.get_pr_base_commit_sha") as mock_merge_base:
450-
tags = get_env_tags()
451-
452-
mock_merge_base.assert_not_called()
453-
assert tags[GitTag.PULL_REQUEST_BASE_BRANCH_SHA] == existing_base_sha
454-
455-
456396
class TestCheckCommitShaConsistency:
457397
"""Tests for _check_commit_sha_consistency telemetry helper."""
458398

0 commit comments

Comments
 (0)