Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions ddtrace/testing/internal/env_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from ddtrace.testing.internal.ci import CITag
from ddtrace.testing.internal.constants import DD_TEST_OPTIMIZATION_ENV_DATA_FILE
from ddtrace.testing.internal.git import GitTag
from ddtrace.testing.internal.git import get_pr_base_commit_sha
from ddtrace.testing.internal.git import get_workspace_path
from ddtrace.testing.internal.offline_mode import get_offline_mode
from ddtrace.testing.internal.offline_mode import resolve_rlocation
Expand Down Expand Up @@ -127,17 +126,6 @@ def get_env_tags() -> dict[str, str]:
if head_sha := tags.get(GitTag.COMMIT_HEAD_SHA):
merge_tags(tags, git.get_git_head_tags_from_git_command(head_sha))

# For GitHub Actions: pull_request.base.sha is the HEAD of the base branch, not the merge base.
# Compute the true base commit SHA via `git merge-base` when it hasn't already been set by the
# CI provider (e.g. GitLab sets CI_MERGE_REQUEST_DIFF_BASE_SHA directly).
if not tags.get(GitTag.PULL_REQUEST_BASE_BRANCH_SHA):
base_branch_head_sha = tags.get(GitTag.PULL_REQUEST_BASE_BRANCH_HEAD_SHA)
pr_head_sha = tags.get(GitTag.COMMIT_HEAD_SHA)
if base_branch_head_sha and pr_head_sha:
merge_base_sha = get_pr_base_commit_sha(base_branch_head_sha, pr_head_sha)
if merge_base_sha:
tags[GitTag.PULL_REQUEST_BASE_BRANCH_SHA] = merge_base_sha

normalize_git_tags(tags)

if workspace_path := tags.get(CITag.WORKSPACE_PATH):
Expand Down
86 changes: 69 additions & 17 deletions ddtrace/testing/internal/git.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from dataclasses import dataclass
import logging
import os
from pathlib import Path
import random
import re
import shutil
import subprocess # nosec: B404
import tempfile
import time
import typing as t

from ddtrace.testing.internal.telemetry import GitTelemetry
Expand All @@ -15,6 +17,12 @@

log = logging.getLogger(__name__)

# Matches git lock-contention errors, e.g.:
# fatal: Unable to create '/path/.git/shallow.lock': File exists.
_LOCK_ERROR_RE = re.compile(r"Unable to create .+\.lock.+File exists", re.DOTALL)
_LOCK_MAX_RETRIES = 5
_LOCK_BASE_DELAY_SECONDS = 0.5


class GitTag:
# Git Repository URL
Expand Down Expand Up @@ -120,6 +128,10 @@ def _call_git(self, args: list[str], input_string: t.Optional[str] = None) -> _G
git_cmd = [self.git_command, *args]
log.debug("Running git command: %r", git_cmd)

# Force C locale so git's error messages are predictable; the lock-contention
# regex in _call_git_with_lock_retry matches the English "File exists" string.
git_env = {**os.environ, "LC_ALL": "C", "LANG": "C"}

with StopWatch() as sw:
process = subprocess.Popen( # nosec: B603
git_cmd,
Expand All @@ -129,6 +141,7 @@ def _call_git(self, args: list[str], input_string: t.Optional[str] = None) -> _G
cwd=self.cwd,
encoding="utf-8",
errors="surrogateescape",
env=git_env,
)
stdout, stderr = process.communicate(input=input_string)

Expand All @@ -150,6 +163,45 @@ def _git_output(self, args: list[str], telemetry_type: t.Optional[GitTelemetry]
return ""
return result.stdout

def _call_git_with_lock_retry(
self,
args: list[str],
input_string: t.Optional[str] = None,
should_retry: t.Optional[t.Callable[[], bool]] = None,
) -> _GitSubprocessDetails:
"""Call git with automatic retry on lock-file contention errors.

In multi-process test environments several workers may issue git
commands simultaneously. Commands that write a lock file (e.g.
``git fetch --update-shallow``) fail with "File exists" when another
process holds the lock. This helper retries such transient failures
with exponential back-off so callers never need to handle the retry
logic themselves.

``should_retry`` is called after each back-off sleep; returning False
aborts the retry loop so callers can stop once another process has
completed the same work (e.g. an xdist sibling that finished
unshallowing while we were waiting on the lock).
"""
result = self._call_git(args, input_string)
for attempt in range(_LOCK_MAX_RETRIES):
if result.return_code == 0 or not _LOCK_ERROR_RE.search(result.stderr):
break
delay = _LOCK_BASE_DELAY_SECONDS * (2**attempt) + random.uniform(0, 1) # nosec: B311
log.debug(
"Git lock contention (attempt %d/%d), retrying in %.1fs: %s",
attempt + 1,
_LOCK_MAX_RETRIES,
delay,
result.stderr,
)
time.sleep(delay)
if should_retry is not None and not should_retry():
log.debug("Git lock retry aborted: work no longer needed")
break
result = self._call_git(args, input_string)
return result

def get_git_version(self) -> tuple[int, ...]:
output = self._git_output(["--version"]) # "git version 1.2.3"
try:
Expand Down Expand Up @@ -232,6 +284,12 @@ def is_shallow_repository(self) -> bool:
return output == "true"

def unshallow_repository(self, refspec: t.Optional[str] = None, parent_only: bool = False) -> _GitSubprocessDetails:
# If another process (e.g. a parallel xdist worker) has already unshallowed the
# repository, skip the redundant fetch.
if not self.is_shallow_repository():
log.debug("Repository is no longer shallow, skipping unshallow")
return _GitSubprocessDetails(stdout="", stderr="", return_code=0, elapsed_seconds=0.0)

remote_name = self.get_remote_name()
command = [
"fetch",
Expand All @@ -245,8 +303,13 @@ def unshallow_repository(self, refspec: t.Optional[str] = None, parent_only: boo
if refspec:
command.append(refspec)

result = self._call_git(command)
result = self._call_git_with_lock_retry(command, should_retry=self.is_shallow_repository)
if result.return_code != 0:
# The retry loop may have aborted because a sibling worker finished
# unshallowing in the meantime; in that case treat the operation as a success.
if not self.is_shallow_repository():
log.debug("Repository was unshallowed by another process during retry")
return _GitSubprocessDetails(stdout="", stderr="", return_code=0, elapsed_seconds=0.0)
log.warning("Error unshallowing repo for refspec %s: %s", refspec, result.stderr)
return result

Expand Down Expand Up @@ -294,7 +357,11 @@ def try_all_unshallow_repository_methods(self) -> bool:

def get_merge_base(self, sha1: str, sha2: str) -> str:
"""Return the best common ancestor commit SHA of sha1 and sha2."""
return self._git_output(["merge-base", sha1, sha2])
result = self._call_git(["merge-base", sha1, sha2])
if result.return_code != 0:
log.warning("Error getting merge-base for %s and %s: %s", sha1, sha2, result.stderr)
return ""
return result.stdout

def pack_objects(self, revisions: list[str]) -> t.Iterable[Path]:
base_name = str(random.randint(1, 1000000)) # nosec: B311
Expand Down Expand Up @@ -324,21 +391,6 @@ def pack_objects(self, revisions: list[str]) -> t.Iterable[Path]:
yield packfile


def get_pr_base_commit_sha(base_branch_head_sha: str, head_sha: str) -> t.Optional[str]:
"""Compute the true PR base commit SHA as the merge base of base_branch_head_sha and head_sha.

On GitHub Actions, pull_request.base.sha is the HEAD of the base branch, not the actual merge
base commit. This function uses `git merge-base` to find the common ancestor.
"""
try:
git = Git()
except RuntimeError as e:
log.warning("Error getting git data for merge-base computation: %s", e)
return None

return git.get_merge_base(base_branch_head_sha, head_sha) or None


def get_git_tags_from_git_command() -> dict[str, t.Optional[str]]:
try:
git = Git()
Expand Down
50 changes: 36 additions & 14 deletions ddtrace/testing/internal/session_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,20 @@ def _get_test_session_name(self) -> str:

return self.session.test_command

def _update_pr_merge_base(self, git: Git) -> None:
"""Compute PULL_REQUEST_BASE_BRANCH_SHA via git merge-base if not already set.

Called from upload_git_data() after any unshallowing so the commits are
guaranteed to be present in the local history.
"""
if self.env_tags.get(GitTag.PULL_REQUEST_BASE_BRANCH_SHA):
return
base_sha = self.env_tags.get(GitTag.PULL_REQUEST_BASE_BRANCH_HEAD_SHA)
head_sha = self.env_tags.get(GitTag.COMMIT_HEAD_SHA)
if base_sha and head_sha:
if merge_base := git.get_merge_base(base_sha, head_sha):
self.env_tags[GitTag.PULL_REQUEST_BASE_BRANCH_SHA] = merge_base

def upload_git_data(self) -> None:
# NOTE: In manifest mode (Bazel sandbox), git commands are unavailable
# and the external tool has already uploaded git pack data before the sandbox
Expand Down Expand Up @@ -438,26 +452,34 @@ def upload_git_data(self) -> None:

commits_not_in_backend = list(set(latest_commits) - set(backend_commits))

if len(commits_not_in_backend) == 0:
log.debug("All latest commits found in backend, skipping metadata upload")
return

if git.is_shallow_repository() and git.get_git_version() >= (2, 27, 0):
log.debug("Shallow repository detected on git > 2.27 detected, unshallowing")
unshallow_successful = git.try_all_unshallow_repository_methods()
if unshallow_successful:
log.debug("Unshallow successful, getting latest commits from backend based on unshallowed commits")
latest_commits = git.get_latest_commits()
backend_commits = self.api_client.get_known_commits(latest_commits)
if backend_commits is None:
log.warning("search_commits failed after unshallow, aborting git metadata upload")
TelemetryAPI.get().record_git_pack_data(0, 0)
return

commits_not_in_backend = list(set(latest_commits) - set(backend_commits))
else:
if len(commits_not_in_backend) > 0:
log.debug("Unshallow successful, getting latest commits from backend based on unshallowed commits")
latest_commits = git.get_latest_commits()
backend_commits = self.api_client.get_known_commits(latest_commits)
if backend_commits is None:
log.warning("search_commits failed after unshallow, aborting git metadata upload")
TelemetryAPI.get().record_git_pack_data(0, 0)
return

commits_not_in_backend = list(set(latest_commits) - set(backend_commits))
elif len(commits_not_in_backend) > 0:
log.warning("Failed to unshallow repository, continuing to send pack data")

# Compute the true PR merge-base now that the repo has been unshallowed (if needed).
# This is done here rather than at env_tags collection time because on shallow clones the
# required commits are only available after the fetch above completes.
# Must run before the early-return below so sessions where all commits are already
# known to the backend still populate git.pull_request.base_branch_sha.
self._update_pr_merge_base(git)
Comment thread
gnufede marked this conversation as resolved.

if len(commits_not_in_backend) == 0:
log.debug("All latest commits found in backend, skipping metadata upload")
return

revisions_to_send = git.get_filtered_revisions(
excluded_commits=backend_commits, included_commits=commits_not_in_backend
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
fixes:
- |
CI Visibility: Fixes spurious ``git`` warnings when running tests with pytest-xdist.
Simultaneous ``git fetch --update-shallow`` calls now retry with exponential back-off
on ``.git/shallow.lock`` contention, skip the fetch entirely once a sibling worker has
already unshallowed the repository, and run under the ``C`` locale so the contention
check is robust against non-English git messages. ``git merge-base`` is deferred until
after unshallowing so the required commits are available locally.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import os
from pathlib import Path
from unittest.mock import Mock
from unittest.mock import patch

# Path is also used in test_env_tags_non_empty_in_online_mode for get_workspace_path mock
Expand All @@ -16,6 +15,7 @@
from ddtrace.testing.internal.session_manager import SessionManager
from ddtrace.testing.internal.test_data import TestSession
from tests.testing.mocks import MockDefaults
from tests.testing.mocks import get_mock_git_instance
from tests.testing.mocks import mock_api_client_settings


Expand Down Expand Up @@ -163,10 +163,7 @@ def test_git_upload_proceeds_in_online_mode(self, monkeypatch, tmp_path):

sm = self._build_sm_with_mocked_api(monkeypatch, env)

mock_git_instance = Mock()
mock_git_instance.get_latest_commits.return_value = []
mock_git_instance.get_filtered_revisions.return_value = []
mock_git_instance.pack_objects.return_value = iter([])
mock_git_instance = get_mock_git_instance()

with (
patch("ddtrace.testing.internal.session_manager.Git", return_value=mock_git_instance) as mock_git_cls,
Expand Down
62 changes: 1 addition & 61 deletions tests/testing/internal/test_env_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,72 +387,12 @@ def test_github_actions_base_branch_head_sha_extracted_from_event(
monkeypatch.setattr(os, "environ", ci_env)
monkeypatch.chdir(git_repo)

with mock.patch("ddtrace.testing.internal.env_tags.get_pr_base_commit_sha", return_value=None):
tags = get_env_tags()
tags = get_env_tags()

assert tags[GitTag.PULL_REQUEST_BASE_BRANCH_HEAD_SHA] == base_branch_head_sha
assert GitTag.PULL_REQUEST_BASE_BRANCH_SHA not in tags


def test_github_actions_base_branch_sha_computed_as_merge_base(
monkeypatch: pytest.MonkeyPatch, git_shallow_repo: tuple[str, str]
) -> None:
"""PULL_REQUEST_BASE_BRANCH_SHA is set to the merge-base of base_branch_head_sha and head_sha."""
git_repo, head_sha = git_shallow_repo
github_sha = "abcd1234"
base_branch_head_sha = "aabbccdd" * 5
expected_merge_base = "deadbeef" * 5

github_event_path = f"{git_repo}/event.json"
with open(github_event_path, "w") as f:
json.dump(
{
"pull_request": {
"head": {"sha": head_sha},
"base": {"sha": base_branch_head_sha},
}
},
f,
)

ci_env = {
"GITHUB_SHA": github_sha,
"GITHUB_EVENT_PATH": github_event_path,
}

monkeypatch.setattr(os, "environ", ci_env)
monkeypatch.chdir(git_repo)

with mock.patch(
"ddtrace.testing.internal.env_tags.get_pr_base_commit_sha", return_value=expected_merge_base
) as mock_merge_base:
tags = get_env_tags()

mock_merge_base.assert_called_once_with(base_branch_head_sha, head_sha)
assert tags[GitTag.PULL_REQUEST_BASE_BRANCH_SHA] == expected_merge_base


def test_github_actions_base_branch_sha_not_overwritten_when_already_set(
monkeypatch: pytest.MonkeyPatch, git_repo: str
) -> None:
"""PULL_REQUEST_BASE_BRANCH_SHA from DD_GIT_PULL_REQUEST_BASE_BRANCH_SHA is not overwritten by merge-base."""
existing_base_sha = "cafebabe" * 5

ci_env = {
"GITHUB_SHA": "abcd1234",
"DD_GIT_PULL_REQUEST_BASE_BRANCH_SHA": existing_base_sha,
}

monkeypatch.setattr(os, "environ", ci_env)
monkeypatch.chdir(git_repo)

with mock.patch("ddtrace.testing.internal.env_tags.get_pr_base_commit_sha") as mock_merge_base:
tags = get_env_tags()

mock_merge_base.assert_not_called()
assert tags[GitTag.PULL_REQUEST_BASE_BRANCH_SHA] == existing_base_sha


class TestCheckCommitShaConsistency:
"""Tests for _check_commit_sha_consistency telemetry helper."""

Expand Down
Loading
Loading