Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
344 changes: 344 additions & 0 deletions .github/workflows/ephemeral-env.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,344 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0

name: Ephemeral Environment

on:
pull_request:
types: [labeled, unlabeled, closed, synchronize]

concurrency:
group: ephemeral-env-pr-${{ github.event.pull_request.number }}
cancel-in-progress: true

jobs:
#######################
# Deploy Environment #
#######################
deploy:
if: |
(
(github.event.action == 'labeled' && github.event.label.name == 'deploy-env') ||
(github.event.action == 'synchronize' && contains(github.event.pull_request.labels.*.name, 'deploy-env'))
) &&
github.event.pull_request.state == 'open' &&
github.event.pull_request.head.repo.fork == false &&
github.event.pull_request.base.ref == 'main'
runs-on: self-hosted
environment:
name: ephemeral-env
url: http://${{ steps.get-ip.outputs.host_ip }}:8080
permissions:
pull-requests: write
contents: read
steps:
- name: Checkout PR branch
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
lfs: true

- name: Setup Bazel
uses: bazel-contrib/setup-bazel@4fd964a13a440a8aeb0be47350db2fc640f19ca8
with:
bazelisk-cache: true
bazelisk-version: 1.27.0

- name: Cleanup existing environment (if any)
run: |
export PR_NUMBER=${{ github.event.pull_request.number }}
echo "Checking for existing environment for PR #${PR_NUMBER}..."

# Stop services using saved PIDs
if [ -f "/tmp/osmo-pr-${PR_NUMBER}/service.pid" ]; then
SERVICE_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/service.pid)
echo "Stopping existing service process (PID: $SERVICE_PID)..."
kill $SERVICE_PID 2>/dev/null || true
pkill -P $SERVICE_PID 2>/dev/null || true
fi

if [ -f "/tmp/osmo-pr-${PR_NUMBER}/backend.pid" ]; then
BACKEND_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/backend.pid)
echo "Stopping existing backend process (PID: $BACKEND_PID)..."
kill $BACKEND_PID 2>/dev/null || true
pkill -P $BACKEND_PID 2>/dev/null || true
fi

# Fallback: Stop any remaining bazel-run processes
pkill -f "bazel.*run.*osmo" || true

# Give processes time to stop
sleep 5

echo "✅ Cleanup completed"

- name: Get Host IP
id: get-ip
run: |
HOST_IP=$(ifconfig | grep "inet " | grep -Fv 127.0.0.1 | grep 10. | awk '{print $2}' | head -1)
echo "host_ip=$HOST_IP" >> $GITHUB_OUTPUT
echo "Host IP: $HOST_IP"

# Create/recreate log directory for this PR
PR_NUMBER=${{ github.event.pull_request.number }}
mkdir -p /tmp/osmo-pr-${PR_NUMBER}
echo "Created log directory: /tmp/osmo-pr-${PR_NUMBER}"

- name: Start OSMO Services (Background)
run: |
export HOST_IP=${{ steps.get-ip.outputs.host_ip }}
export PR_NUMBER=${{ github.event.pull_request.number }}

echo "Starting OSMO services in background..."
# Start services in background with nohup
nohup bazel run @osmo_workspace//run:start_service -- --mode bazel --log-level=debug \
> /tmp/osmo-pr-${PR_NUMBER}/service.log 2>&1 &

# Save the PID
echo $! > /tmp/osmo-pr-${PR_NUMBER}/service.pid
echo "Started OSMO services with PID: $(cat /tmp/osmo-pr-${PR_NUMBER}/service.pid)"
echo "Logs: /tmp/osmo-pr-${PR_NUMBER}/service.log"

- name: Wait for OSMO Services to be ready
run: |
HOST_IP=${{ steps.get-ip.outputs.host_ip }}
PR_NUMBER=${{ github.event.pull_request.number }}

echo "Waiting for OSMO services to be ready at http://$HOST_IP:8080..."
max_attempts=60
attempt=0

while [ $attempt -lt $max_attempts ]; do
# Check if the process is still running
if [ -f "/tmp/osmo-pr-${PR_NUMBER}/service.pid" ]; then
SERVICE_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/service.pid)
if ! kill -0 $SERVICE_PID 2>/dev/null; then
echo "❌ Service process died unexpectedly!"
echo "Last 50 lines of service log:"
tail -50 /tmp/osmo-pr-${PR_NUMBER}/service.log
exit 1
fi
fi

# Check if service is responding
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" http://$HOST_IP:8080 2>/dev/null || echo "000")
if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "302" ]; then
echo "✅ OSMO services are ready! (HTTP $HTTP_CODE)"
exit 0
fi

attempt=$((attempt + 1))
echo "Attempt $attempt/$max_attempts - HTTP $HTTP_CODE - waiting..."
sleep 10
done

echo "❌ OSMO services did not become ready in time"
echo "Last 50 lines of service log:"
tail -50 /tmp/osmo-pr-${PR_NUMBER}/service.log
exit 1

- name: Start OSMO Backend (Background)
run: |
export HOST_IP=${{ steps.get-ip.outputs.host_ip }}
export PR_NUMBER=${{ github.event.pull_request.number }}

echo "Starting OSMO backend in background..."
# Start backend in background with nohup
nohup bazel run @osmo_workspace//run:start_backend -- --mode bazel --log-level=debug \
> /tmp/osmo-pr-${PR_NUMBER}/backend.log 2>&1 &

# Save the PID
echo $! > /tmp/osmo-pr-${PR_NUMBER}/backend.pid
echo "Started OSMO backend with PID: $(cat /tmp/osmo-pr-${PR_NUMBER}/backend.pid)"
echo "Logs: /tmp/osmo-pr-${PR_NUMBER}/backend.log"

# Give backend a moment to start
sleep 10

# Check if backend process is running
BACKEND_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/backend.pid)
if ! kill -0 $BACKEND_PID 2>/dev/null; then
echo "❌ Backend process died unexpectedly!"
echo "Last 50 lines of backend log:"
tail -50 /tmp/osmo-pr-${PR_NUMBER}/backend.log
exit 1
fi

echo "✅ Backend process is running"

- name: Update Configuration
run: |
export HOST_IP=${{ steps.get-ip.outputs.host_ip }}
bazel run @osmo_workspace//run:update_configs -- --mode bazel

- name: Verify processes are running
run: |
PR_NUMBER=${{ github.event.pull_request.number }}
echo "Checking process status..."

SERVICE_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/service.pid)
BACKEND_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/backend.pid)

echo "Service PID: $SERVICE_PID"
ps -p $SERVICE_PID -o pid,ppid,comm,args || echo "Service process not found!"

echo "Backend PID: $BACKEND_PID"
ps -p $BACKEND_PID -o pid,ppid,comm,args || echo "Backend process not found!"

- name: Test environment with hello_world workflow
run: |
export HOST_IP=${{ steps.get-ip.outputs.host_ip }}
# Login to OSMO
bazel run @osmo_workspace//src/cli -- login http://$HOST_IP:8000 --method=dev --username=testuser

# Submit test workflow (if available)
if [ -f "workflows/basics/hello_world/hello_world.yaml" ]; then
echo "Submitting test workflow..."
bazel run @osmo_workspace//src/cli -- workflow submit workflows/basics/hello_world/hello_world.yaml
else
echo "Test workflow not found, skipping validation"
fi

- name: Comment on PR with environment details
uses: actions/github-script@v7
with:
script: |
const hostIp = '${{ steps.get-ip.outputs.host_ip }}';
const prNumber = context.payload.pull_request.number;
const envUrl = `http://${hostIp}:8080`;
const action = context.payload.action;
const isRedeployment = action === 'synchronize';
const commitSha = context.payload.pull_request.head.sha.substring(0, 7);

const emoji = isRedeployment ? '🔄' : '🚀';
const title = isRedeployment ? 'Ephemeral Environment Redeployed' : 'Ephemeral Environment Ready';
const note = isRedeployment ? `Updated to commit \`${commitSha}\`` : '';

const comment = `## ${emoji} ${title}

**Access**: ${envUrl}
${note}

**Login**:
\`\`\`bash
bazel run @osmo_workspace//src/cli -- login http://${hostIp}:8000 --method=dev --username=testuser
\`\`\`

Remove \`deploy-env\` label to tear down.`;

await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: comment
});

- name: Handle deployment failure
if: failure()
uses: actions/github-script@v7
with:
script: |
const prNumber = context.payload.pull_request.number;
const runUrl = `${context.payload.repository.html_url}/actions/runs/${context.runId}`;

const comment = `## ❌ Ephemeral Environment Deployment Failed

Check [workflow logs](${runUrl}) for details.`;

await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: comment
});

#######################
# Cleanup Environment #
#######################
cleanup:
if: |
(
(github.event.action == 'unlabeled' && github.event.label.name == 'deploy-env') ||
(github.event.action == 'closed')
) &&
github.event.pull_request.head.repo.fork == false &&
github.event.pull_request.base.ref == 'main'
runs-on: self-hosted
permissions:
pull-requests: write
contents: read
steps:
- name: Checkout PR branch
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
lfs: true

- name: Stop OSMO services
run: |
export PR_NUMBER=${{ github.event.pull_request.number }}
echo "Stopping OSMO services for PR #${PR_NUMBER}..."

# Stop services using saved PIDs
if [ -f "/tmp/osmo-pr-${PR_NUMBER}/service.pid" ]; then
SERVICE_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/service.pid)
echo "Stopping service process (PID: $SERVICE_PID)..."
kill $SERVICE_PID 2>/dev/null || true
# Kill process tree
pkill -P $SERVICE_PID 2>/dev/null || true
fi

if [ -f "/tmp/osmo-pr-${PR_NUMBER}/backend.pid" ]; then
BACKEND_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/backend.pid)
echo "Stopping backend process (PID: $BACKEND_PID)..."
kill $BACKEND_PID 2>/dev/null || true
# Kill process tree
pkill -P $BACKEND_PID 2>/dev/null || true
fi

# Fallback: Stop any remaining bazel-run processes
pkill -f "bazel.*run.*osmo" || true

# Stop docker containers created by the environment
docker ps -a --filter "label=osmo-pr=${PR_NUMBER}" --format "{{.ID}}" | xargs -r docker rm -f || true

# Clean up any lingering postgres/redis/localstack containers
docker ps -a --filter "name=postgres" --format "{{.ID}}" | xargs -r docker stop || true
docker ps -a --filter "name=redis" --format "{{.ID}}" | xargs -r docker stop || true
docker ps -a --filter "name=localstack" --format "{{.ID}}" | xargs -r docker stop || true

# Remove log directory
rm -rf /tmp/osmo-pr-${PR_NUMBER}

echo "✅ Cleanup completed"

- name: Comment on PR about cleanup
uses: actions/github-script@v7
with:
script: |
const prNumber = context.payload.pull_request.number;
const reason = context.payload.action === 'closed' ? 'PR closed' : 'Label removed';

const comment = `## 🧹 Ephemeral Environment Torn Down

${reason} - environment cleaned up.`;

await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: comment
});
Loading