From a0349a58e97c19075d88b6503fc2c22dc0eb3816 Mon Sep 17 00:00:00 2001 From: Ethan Look-Potts Date: Mon, 17 Nov 2025 20:56:42 -0500 Subject: [PATCH] WIP --- .github/workflows/ephemeral-env.yaml | 344 +++++++++++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 .github/workflows/ephemeral-env.yaml diff --git a/.github/workflows/ephemeral-env.yaml b/.github/workflows/ephemeral-env.yaml new file mode 100644 index 00000000..e136fc7e --- /dev/null +++ b/.github/workflows/ephemeral-env.yaml @@ -0,0 +1,344 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +name: Ephemeral Environment + +on: + pull_request: + types: [labeled, unlabeled, closed, synchronize] + +concurrency: + group: ephemeral-env-pr-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + ####################### + # Deploy Environment # + ####################### + deploy: + if: | + ( + (github.event.action == 'labeled' && github.event.label.name == 'deploy-env') || + (github.event.action == 'synchronize' && contains(github.event.pull_request.labels.*.name, 'deploy-env')) + ) && + github.event.pull_request.state == 'open' && + github.event.pull_request.head.repo.fork == false && + github.event.pull_request.base.ref == 'main' + runs-on: self-hosted + environment: + name: ephemeral-env + url: http://${{ steps.get-ip.outputs.host_ip }}:8080 + permissions: + pull-requests: write + contents: read + steps: + - name: Checkout PR branch + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + lfs: true + + - name: Setup Bazel + uses: bazel-contrib/setup-bazel@4fd964a13a440a8aeb0be47350db2fc640f19ca8 + with: + bazelisk-cache: true + bazelisk-version: 1.27.0 + + - name: Cleanup existing environment (if any) + run: | + export PR_NUMBER=${{ github.event.pull_request.number }} + echo "Checking for existing environment for PR #${PR_NUMBER}..." + + # Stop services using saved PIDs + if [ -f "/tmp/osmo-pr-${PR_NUMBER}/service.pid" ]; then + SERVICE_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/service.pid) + echo "Stopping existing service process (PID: $SERVICE_PID)..." + kill $SERVICE_PID 2>/dev/null || true + pkill -P $SERVICE_PID 2>/dev/null || true + fi + + if [ -f "/tmp/osmo-pr-${PR_NUMBER}/backend.pid" ]; then + BACKEND_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/backend.pid) + echo "Stopping existing backend process (PID: $BACKEND_PID)..." + kill $BACKEND_PID 2>/dev/null || true + pkill -P $BACKEND_PID 2>/dev/null || true + fi + + # Fallback: Stop any remaining bazel-run processes + pkill -f "bazel.*run.*osmo" || true + + # Give processes time to stop + sleep 5 + + echo "✅ Cleanup completed" + + - name: Get Host IP + id: get-ip + run: | + HOST_IP=$(ifconfig | grep "inet " | grep -Fv 127.0.0.1 | grep 10. | awk '{print $2}' | head -1) + echo "host_ip=$HOST_IP" >> $GITHUB_OUTPUT + echo "Host IP: $HOST_IP" + + # Create/recreate log directory for this PR + PR_NUMBER=${{ github.event.pull_request.number }} + mkdir -p /tmp/osmo-pr-${PR_NUMBER} + echo "Created log directory: /tmp/osmo-pr-${PR_NUMBER}" + + - name: Start OSMO Services (Background) + run: | + export HOST_IP=${{ steps.get-ip.outputs.host_ip }} + export PR_NUMBER=${{ github.event.pull_request.number }} + + echo "Starting OSMO services in background..." + # Start services in background with nohup + nohup bazel run @osmo_workspace//run:start_service -- --mode bazel --log-level=debug \ + > /tmp/osmo-pr-${PR_NUMBER}/service.log 2>&1 & + + # Save the PID + echo $! > /tmp/osmo-pr-${PR_NUMBER}/service.pid + echo "Started OSMO services with PID: $(cat /tmp/osmo-pr-${PR_NUMBER}/service.pid)" + echo "Logs: /tmp/osmo-pr-${PR_NUMBER}/service.log" + + - name: Wait for OSMO Services to be ready + run: | + HOST_IP=${{ steps.get-ip.outputs.host_ip }} + PR_NUMBER=${{ github.event.pull_request.number }} + + echo "Waiting for OSMO services to be ready at http://$HOST_IP:8080..." + max_attempts=60 + attempt=0 + + while [ $attempt -lt $max_attempts ]; do + # Check if the process is still running + if [ -f "/tmp/osmo-pr-${PR_NUMBER}/service.pid" ]; then + SERVICE_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/service.pid) + if ! kill -0 $SERVICE_PID 2>/dev/null; then + echo "❌ Service process died unexpectedly!" + echo "Last 50 lines of service log:" + tail -50 /tmp/osmo-pr-${PR_NUMBER}/service.log + exit 1 + fi + fi + + # Check if service is responding + HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" http://$HOST_IP:8080 2>/dev/null || echo "000") + if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "302" ]; then + echo "✅ OSMO services are ready! (HTTP $HTTP_CODE)" + exit 0 + fi + + attempt=$((attempt + 1)) + echo "Attempt $attempt/$max_attempts - HTTP $HTTP_CODE - waiting..." + sleep 10 + done + + echo "❌ OSMO services did not become ready in time" + echo "Last 50 lines of service log:" + tail -50 /tmp/osmo-pr-${PR_NUMBER}/service.log + exit 1 + + - name: Start OSMO Backend (Background) + run: | + export HOST_IP=${{ steps.get-ip.outputs.host_ip }} + export PR_NUMBER=${{ github.event.pull_request.number }} + + echo "Starting OSMO backend in background..." + # Start backend in background with nohup + nohup bazel run @osmo_workspace//run:start_backend -- --mode bazel --log-level=debug \ + > /tmp/osmo-pr-${PR_NUMBER}/backend.log 2>&1 & + + # Save the PID + echo $! > /tmp/osmo-pr-${PR_NUMBER}/backend.pid + echo "Started OSMO backend with PID: $(cat /tmp/osmo-pr-${PR_NUMBER}/backend.pid)" + echo "Logs: /tmp/osmo-pr-${PR_NUMBER}/backend.log" + + # Give backend a moment to start + sleep 10 + + # Check if backend process is running + BACKEND_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/backend.pid) + if ! kill -0 $BACKEND_PID 2>/dev/null; then + echo "❌ Backend process died unexpectedly!" + echo "Last 50 lines of backend log:" + tail -50 /tmp/osmo-pr-${PR_NUMBER}/backend.log + exit 1 + fi + + echo "✅ Backend process is running" + + - name: Update Configuration + run: | + export HOST_IP=${{ steps.get-ip.outputs.host_ip }} + bazel run @osmo_workspace//run:update_configs -- --mode bazel + + - name: Verify processes are running + run: | + PR_NUMBER=${{ github.event.pull_request.number }} + echo "Checking process status..." + + SERVICE_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/service.pid) + BACKEND_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/backend.pid) + + echo "Service PID: $SERVICE_PID" + ps -p $SERVICE_PID -o pid,ppid,comm,args || echo "Service process not found!" + + echo "Backend PID: $BACKEND_PID" + ps -p $BACKEND_PID -o pid,ppid,comm,args || echo "Backend process not found!" + + - name: Test environment with hello_world workflow + run: | + export HOST_IP=${{ steps.get-ip.outputs.host_ip }} + # Login to OSMO + bazel run @osmo_workspace//src/cli -- login http://$HOST_IP:8000 --method=dev --username=testuser + + # Submit test workflow (if available) + if [ -f "workflows/basics/hello_world/hello_world.yaml" ]; then + echo "Submitting test workflow..." + bazel run @osmo_workspace//src/cli -- workflow submit workflows/basics/hello_world/hello_world.yaml + else + echo "Test workflow not found, skipping validation" + fi + + - name: Comment on PR with environment details + uses: actions/github-script@v7 + with: + script: | + const hostIp = '${{ steps.get-ip.outputs.host_ip }}'; + const prNumber = context.payload.pull_request.number; + const envUrl = `http://${hostIp}:8080`; + const action = context.payload.action; + const isRedeployment = action === 'synchronize'; + const commitSha = context.payload.pull_request.head.sha.substring(0, 7); + + const emoji = isRedeployment ? '🔄' : '🚀'; + const title = isRedeployment ? 'Ephemeral Environment Redeployed' : 'Ephemeral Environment Ready'; + const note = isRedeployment ? `Updated to commit \`${commitSha}\`` : ''; + + const comment = `## ${emoji} ${title} + + **Access**: ${envUrl} + ${note} + + **Login**: + \`\`\`bash + bazel run @osmo_workspace//src/cli -- login http://${hostIp}:8000 --method=dev --username=testuser + \`\`\` + + Remove \`deploy-env\` label to tear down.`; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: comment + }); + + - name: Handle deployment failure + if: failure() + uses: actions/github-script@v7 + with: + script: | + const prNumber = context.payload.pull_request.number; + const runUrl = `${context.payload.repository.html_url}/actions/runs/${context.runId}`; + + const comment = `## ❌ Ephemeral Environment Deployment Failed + + Check [workflow logs](${runUrl}) for details.`; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: comment + }); + + ####################### + # Cleanup Environment # + ####################### + cleanup: + if: | + ( + (github.event.action == 'unlabeled' && github.event.label.name == 'deploy-env') || + (github.event.action == 'closed') + ) && + github.event.pull_request.head.repo.fork == false && + github.event.pull_request.base.ref == 'main' + runs-on: self-hosted + permissions: + pull-requests: write + contents: read + steps: + - name: Checkout PR branch + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + lfs: true + + - name: Stop OSMO services + run: | + export PR_NUMBER=${{ github.event.pull_request.number }} + echo "Stopping OSMO services for PR #${PR_NUMBER}..." + + # Stop services using saved PIDs + if [ -f "/tmp/osmo-pr-${PR_NUMBER}/service.pid" ]; then + SERVICE_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/service.pid) + echo "Stopping service process (PID: $SERVICE_PID)..." + kill $SERVICE_PID 2>/dev/null || true + # Kill process tree + pkill -P $SERVICE_PID 2>/dev/null || true + fi + + if [ -f "/tmp/osmo-pr-${PR_NUMBER}/backend.pid" ]; then + BACKEND_PID=$(cat /tmp/osmo-pr-${PR_NUMBER}/backend.pid) + echo "Stopping backend process (PID: $BACKEND_PID)..." + kill $BACKEND_PID 2>/dev/null || true + # Kill process tree + pkill -P $BACKEND_PID 2>/dev/null || true + fi + + # Fallback: Stop any remaining bazel-run processes + pkill -f "bazel.*run.*osmo" || true + + # Stop docker containers created by the environment + docker ps -a --filter "label=osmo-pr=${PR_NUMBER}" --format "{{.ID}}" | xargs -r docker rm -f || true + + # Clean up any lingering postgres/redis/localstack containers + docker ps -a --filter "name=postgres" --format "{{.ID}}" | xargs -r docker stop || true + docker ps -a --filter "name=redis" --format "{{.ID}}" | xargs -r docker stop || true + docker ps -a --filter "name=localstack" --format "{{.ID}}" | xargs -r docker stop || true + + # Remove log directory + rm -rf /tmp/osmo-pr-${PR_NUMBER} + + echo "✅ Cleanup completed" + + - name: Comment on PR about cleanup + uses: actions/github-script@v7 + with: + script: | + const prNumber = context.payload.pull_request.number; + const reason = context.payload.action === 'closed' ? 'PR closed' : 'Label removed'; + + const comment = `## 🧹 Ephemeral Environment Torn Down + + ${reason} - environment cleaned up.`; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: comment + });