Skip to content

[GPU] Assert sort runs on GPU in tests #7767

[GPU] Assert sort runs on GPU in tests

[GPU] Assert sort runs on GPU in tests #7767

Workflow file for this run

name: PR CI
on:
pull_request:
workflow_dispatch:
inputs:
runner_os:
type: choice
description: The OS of the runners that will build and test bodo.
options:
- ubuntu-latest
- windows-latest
- macos-latest
# Limit CI to cancel previous runs in the same PR
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }}
cancel-in-progress: true
jobs:
get_version:
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@v5
with:
# For setuptools_scm to be able to determine version,
# we need to fetch the entire history
fetch-depth: 0
- name: Get Version
id: get_version
run: |
set -exo pipefail
pip install setuptools_scm
# Print to the logs
echo "bodo_version=$(python -m setuptools_scm)"
echo "bodo_version=$(python -m setuptools_scm)" >> "$GITHUB_OUTPUT"
outputs:
bodo_version: ${{ steps.get_version.outputs.bodo_version }}
# 1) Validate changes and commit message
validate:
name: Validate
runs-on: ubuntu-latest
outputs:
# https://docs.github.com/en/actions/learn-github-actions/expressions#contains
# contains is case-insensitive
run_tests: ${{ (steps.changes.outputs.not_docs && contains(steps.check_msg.outputs.commit_message, '[run ci]')) || github.event_name == 'workflow_dispatch' }}
run_gpu: ${{ contains(github.event.pull_request.title, '[GPU]') || github.event_name == 'workflow_dispatch' }}
run_bodosql_customer_tests: ${{ steps.changes.outputs.bodosql_customer_tests || github.event_name == 'workflow_dispatch' }}
not_docs: ${{ steps.changes.outputs.not_docs }}
skip_flag: ${{ contains(steps.check_msg.outputs.commit_message, '[skip]') }}
steps:
- uses: actions/checkout@v5
with:
ref: ${{ github.ref }}
fetch-depth: "0"
- name: Validate Changes
id: changes
uses: dorny/paths-filter@v3
with:
filters: |
bodosql_customer_tests:
- 'BodoSQL/calcite_sql/**'
not_docs:
- '!docs/**'
# Fetch the PR branch for the commit history
- uses: actions/checkout@v5
with:
ref: ${{ github.event.pull_request.head.sha }}
sparse-checkout: .
- name: Check Commit Message
id: check_msg
run: |
set -xe pipefail
echo "commit_message=$(git log -1 --pretty=format:'%s')" >> "$GITHUB_OUTPUT"
# 2) Trigger BodoSQL Customer tests
bodosql-customer-tests:
needs: [validate]
name: BodoSQL Customer Tests
runs-on: ubuntu-latest
if: |
needs.validate.outputs.run_tests == 'true' &&
needs.validate.outputs.run_bodosql_customer_tests == 'true'
steps:
- uses: convictional/trigger-workflow-and-wait@v1.6.5
with:
owner: bodo-ai
repo: customer-sample-code
github_token: ${{ secrets.BOT_HERMAN_GITHUB_TOKEN }}
workflow_file_name: customer_bodosql_ci.yaml
ref: master
client_payload: '{ "branch" : "${{ github.head_ref || github.ref_name }}" }'
# 3) Pre-Build Bodo to save build artifacts to sccache
compile-bodo:
needs: [validate]
name: Pre-Build Bodo for Cache
runs-on: ${{ inputs.runner_os || 'ubuntu-latest' }}
if: needs.validate.outputs.run_tests == 'true'
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v5
- name: Build from Source
uses: ./.github/actions/build-source
with:
build-all: false
- name: Load and Save Hadoop to Cache
id: hadoop-cache
if: ${{ runner.os != 'Windows' }}
uses: actions/cache@v4
with:
path: hadoop.tar.gz
key: hadoop-3.3.2-${{ runner.os }}
- name: Download Hadoop for SAS
if: ${{ runner.os != 'Windows' && steps.hadoop-cache.outputs.cache-hit != 'true' }}
run: |
wget -O hadoop.tar.gz "https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=hadoop/common/hadoop-3.3.2/hadoop-3.3.2.tar.gz"
shell: bash
# 4) Actually run tests
pr-ci:
needs: [compile-bodo, get_version]
name: Test Compiler
strategy:
matrix:
batch: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
# Don't cancel other jobs if one fails
fail-fast: false
uses: ./.github/workflows/_test_python_source.yml
with:
batch: ${{ matrix.batch }}
total-batches: 12
pytest-marker: "not slow and not weekly or smoke"
collect-coverage: true
runner-id: ${{ inputs.runner_os || 'ubuntu-latest' }}
test-type: NORMAL
bodo-version: ${{ needs.get_version.outputs.bodo_version }}
secrets: inherit
spawn-ci:
needs: [compile-bodo, get_version]
name: Test Spawn
uses: ./.github/workflows/_test_python_source.yml
with:
batch: 1
total-batches: 1
pytest-marker: "spawn_mode"
collect-coverage: true
runner-id: ${{ inputs.runner_os || 'ubuntu-latest' }}
test-type: SPAWN
bodo-version: ${{ needs.get_version.outputs.bodo_version }}
secrets: inherit
df-lib-non-jit-ci:
needs: [compile-bodo, get_version]
name: Test DF Library Non-JIT
uses: ./.github/workflows/_test_python_source.yml
with:
batch: 1
total-batches: 1
pytest-marker: "df_lib and not jit_dependency and not weekly"
collect-coverage: true
runner-id: ${{ inputs.runner_os || 'ubuntu-latest' }}
test-type: "DF_LIB_NO_JIT"
bodo-version: ${{ needs.get_version.outputs.bodo_version }}
secrets: inherit
df-lib-ci:
needs: [compile-bodo, get_version]
name: Test DF Library
strategy:
matrix:
batch: [1, 2]
# Don't cancel other jobs if one fails
fail-fast: false
uses: ./.github/workflows/_test_python_source.yml
with:
batch: ${{ matrix.batch }}
total-batches: 2
pytest-marker: "df_lib and jit_dependency and not weekly"
collect-coverage: true
runner-id: ${{ inputs.runner_os || 'ubuntu-latest' }}
test-type: "DF_LIB"
bodo-version: ${{ needs.get_version.outputs.bodo_version }}
secrets: inherit
narwhals-ci-single-worker:
needs: [compile-bodo, get_version]
name: Test Narwhals Bodo Integration (1 Worker)
uses: ./.github/workflows/_test_python_source.yml
with:
batch: 1
total-batches: 1
pytest-marker: ""
collect-coverage: false
runner-id: ${{ inputs.runner_os || 'ubuntu-latest' }}
test-type: NARWHALS
bodo-version: ${{ needs.get_version.outputs.bodo_version }}
bodo-num-workers: "1"
secrets: inherit
narwhals-ci:
needs: [compile-bodo, get_version]
name: Test Narwhals Bodo Integration (Multiple Worker)
uses: ./.github/workflows/_test_python_source.yml
with:
batch: 1
total-batches: 1
pytest-marker: ""
collect-coverage: false
runner-id: ${{ inputs.runner_os || 'ubuntu-latest' }}
test-type: NARWHALS
bodo-version: ${{ needs.get_version.outputs.bodo_version }}
secrets: inherit
gpu-ci:
needs: [validate, get_version]
name: Test DF Library Multi-GPU
if: |
needs.validate.outputs.run_tests == 'true' &&
needs.validate.outputs.run_gpu == 'true'
uses: ./.github/workflows/_test_python_source.yml
with:
batch: 1
total-batches: 1
pytest-marker: "gpu"
collect-coverage: true
runner-id: "self-hosted-multi-gpu"
test-type: "DF_LIB_GPU"
bodo-version: ${{ needs.get_version.outputs.bodo_version }}
secrets: inherit
bodosql-cpp-ci:
needs: [compile-bodo, get_version]
name: Test BodoSQL C++ Backend
uses: ./.github/workflows/_test_python_source.yml
with:
batch: 1
total-batches: 1
pytest-marker: "bodosql_cpp and not weekly"
collect-coverage: true
runner-id: ${{ inputs.runner_os || 'ubuntu-latest' }}
test-type: "BODOSQL_CPP"
bodo-version: ${{ needs.get_version.outputs.bodo_version }}
secrets: inherit
java-ci:
name: Test Java
needs: [validate]
if: |
needs.validate.outputs.run_tests == 'true' &&
needs.validate.outputs.run_bodosql_customer_tests == 'true'
uses: ./.github/workflows/_test_java_source.yml
# 5) Collect and combine any results from runs
collect-results:
needs: [pr-ci]
name: Collect Results
runs-on: ubuntu-latest
if: success() || failure()
steps:
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: 3.14
# Get all source code for mapping coverage to LOC
- uses: actions/checkout@v5
with:
# Disable shallow clones for better coverage reporting
fetch-depth: 0
# Download timing and coverage info
- name: Download Timings
uses: actions/download-artifact@v5
with:
merge-multiple: true
# Merging
- name: Merge Files
run: |
set -exo pipefail
mkdir outputs
python -m pip install pytest-split coverage pytest-cov
# -S to sort by key, -s to read as arrays, 'add' to merge
jq -S -s 'add' test_dur_bodo_*.json > outputs/test_dur_bodo.json
jq -S -s 'add' test_dur_bodosql_*.json > outputs/test_dur_bodosql.json
printf "Slowest Bodo Tests"
slowest-tests --durations-path outputs/test_dur_bodo.json -c 20
printf "Slowest BodoSQL Tests"
slowest-tests --durations-path outputs/test_dur_bodosql.json -c 20
coverage combine .coverage_*
coverage report
coverage xml -i --omit bodo/runtests.py
cp coverage.xml outputs/coverage.xml
cp .coverage outputs/.coverage
- name: Upload Combined
uses: actions/upload-artifact@v4
with:
name: test-durations
path: outputs/
include-hidden-files: true
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
slug: bodo-ai/Bodo
- name: Upload test results to Codecov
if: ${{ !cancelled() }}
uses: codecov/test-results-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}
- name: Check Test Runs
if: ${{ needs.pr-ci.result != 'success' }}
run: exit 1
# Notify GitHub Checks
# If any test runs failed, this step should fail
# This step failing will block the PR from being merged
# Referenced blogpost: https://emmer.dev/blog/skippable-github-status-checks-aren-t-really-required/
status-check:
name: Status Check
if: always()
needs:
- collect-results
- df-lib-ci
- df-lib-non-jit-ci
- bodosql-cpp-ci
- gpu-ci
- validate
runs-on: ubuntu-latest
steps:
# Allows skipping if only docs files were changed, or [skip] flag was set.
- name: Skip check for docs-only PR
if: ${{ (needs.validate.outputs.not_docs != 'true') || (needs.validate.outputs.skip_flag == 'true') }}
run: echo "Docs-only PR, skipping status enforcement."
# Otherwise, requires all checks to pass.
- name: Fail if needed jobs failed or were skipped
if: ${{ (needs.validate.outputs.not_docs == 'true') && (needs.validate.outputs.skip_flag != 'true') }}
uses: re-actors/alls-green@release/v1
with:
allowed-skips: ${{ needs.validate.outputs.run_gpu != 'true' && 'gpu-ci' || '' }}
jobs: ${{ toJSON(needs) }}