From 67370de095fe010ce27a246ab14d18516cfa0af9 Mon Sep 17 00:00:00 2001 From: matttrach Date: Tue, 21 Oct 2025 14:01:05 -0500 Subject: [PATCH] fix: create a slow mode for testing Signed-off-by: matttrach --- .github/workflows/manual.yaml | 48 ------------------ .github/workflows/release.yaml | 14 ++++-- modules/deploy/create.sh.tpl | 4 +- modules/deploy/destroy.sh.tpl | 4 +- run_tests.sh | 89 ++++++++++++++++++++++++++++++---- 5 files changed, 95 insertions(+), 64 deletions(-) delete mode 100644 .github/workflows/manual.yaml diff --git a/.github/workflows/manual.yaml b/.github/workflows/manual.yaml deleted file mode 100644 index 0be99fe..0000000 --- a/.github/workflows/manual.yaml +++ /dev/null @@ -1,48 +0,0 @@ -name: manual - -on: workflow_dispatch - -env: - AWS_REGION: us-west-2 - AWS_ROLE: arn:aws:iam::270074865685:role/terraform-module-ci-test - GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} - ACME_SERVER_URL: https://acme-v02.api.letsencrypt.org/directory - -permissions: write-all - -jobs: - test_TestOneBasic: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - with: - token: ${{secrets.GITHUB_TOKEN}} - fetch-depth: 0 - - id: aws-creds - uses: aws-actions/configure-aws-credentials@v5 - with: - role-to-assume: ${{env.AWS_ROLE}} - role-session-name: ${{github.run_id}} - aws-region: ${{env.AWS_REGION}} - role-duration-seconds: 7200 # 2 hours - output-credentials: true - - name: install-nix - run: | - curl -L https://nixos.org/nix/install | sh - source /home/runner/.nix-profile/etc/profile.d/nix.sh - nix --version - which nix - - name: run_tests - shell: '/home/runner/.nix-profile/bin/nix develop --ignore-environment --extra-experimental-features nix-command --extra-experimental-features flakes --keep HOME --keep SSH_AUTH_SOCK --keep IDENTIFIER --keep GITHUB_TOKEN --keep GITHUB_OWNER --keep ZONE --keep AWS_ROLE --keep AWS_REGION --keep AWS_DEFAULT_REGION --keep AWS_ACCESS_KEY_ID --keep AWS_SECRET_ACCESS_KEY --keep AWS_SESSION_TOKEN --keep UPDATECLI_GPGTOKEN --keep UPDATECLI_GITHUB_TOKEN --keep UPDATECLI_GITHUB_ACTOR --keep GPG_SIGNING_KEY --keep NIX_SSL_CERT_FILE --keep NIX_ENV_LOADED --keep TERM --command bash -e {0}' - env: - AWS_ACCESS_KEY_ID: ${{ steps.aws-creds.outputs.aws-access-key-id }} - AWS_SECRET_ACCESS_KEY: ${{ steps.aws-creds.outputs.aws-secret-access-key }} - AWS_SESSION_TOKEN: ${{ steps.aws-creds.outputs.aws-session-token }} - GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} - GITHUB_OWNER: rancher - IDENTIFIER: ${{github.run_id}} - ZONE: ${{secrets.ZONE}} - ACME_SERVER_URL: https://acme-v02.api.letsencrypt.org/directory - RANCHER_INSECURE: false - run: | - ./run_tests.sh -t TestOneBasic diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 050d8e4..f7d1603 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -73,6 +73,13 @@ jobs: repo: "${{ github.event.repository.name }}", body: "Please make sure e2e tests pass before merging this PR! \n ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" }) + + test: + needs: + - release + if: needs.release.outputs.release_pr + runs-on: ubuntu-latest + steps: - uses: actions/checkout@v5 with: token: ${{secrets.GITHUB_TOKEN}} @@ -83,7 +90,7 @@ jobs: role-to-assume: ${{env.AWS_ROLE}} role-session-name: ${{github.run_id}} aws-region: ${{env.AWS_REGION}} - role-duration-seconds: 14400 # 4 hours + role-duration-seconds: 28800 # 8 hours output-credentials: true - name: install-nix run: | @@ -106,12 +113,12 @@ jobs: ACME_SERVER_URL: https://acme-v02.api.letsencrypt.org/directory RANCHER_INSECURE: false run: | - # should take around 4 hours - ./run_tests.sh + ./run_tests.sh -s cleanup: needs: - release + - test if: always() && needs.release.outputs.release_pr runs-on: ubuntu-latest steps: @@ -147,6 +154,7 @@ jobs: report: needs: - release + - test - cleanup if: success() && needs.release.outputs.release_pr #Ensure the test jobs succeeded, and that a release PR was created. runs-on: ubuntu-latest diff --git a/modules/deploy/create.sh.tpl b/modules/deploy/create.sh.tpl index 713109a..ebb3a82 100644 --- a/modules/deploy/create.sh.tpl +++ b/modules/deploy/create.sh.tpl @@ -18,7 +18,7 @@ E1=0 while [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; do A=0 while [ $E -gt 0 ] && [ $A -lt $MAX ]; do - timeout -k 1m ${timeout} terraform apply -var-file="${deploy_path}/inputs.tfvars" -auto-approve -state="${deploy_path}/tfstate" + timeout -k 1m ${timeout} terraform apply -var-file="${deploy_path}/inputs.tfvars" -no-color -auto-approve -state="${deploy_path}/tfstate" E=$? if [ $E -eq 124 ]; then echo "Apply timed out after ${timeout}"; fi A=$((A+1)) @@ -27,7 +27,7 @@ while [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; do if [ $E -gt 0 ] && [ $ATTEMPTS != $((MAX-1)) ]; then A1=0 while [ $E1 -gt 0 ] && [ $A1 -lt $MAX ]; do - timeout -k 1m ${timeout} terraform destroy -var-file="${deploy_path}/inputs.tfvars" -auto-approve -state="${deploy_path}/tfstate" + timeout -k 1m ${timeout} terraform destroy -var-file="${deploy_path}/inputs.tfvars" -no-color -auto-approve -state="${deploy_path}/tfstate" E1=$? if [ $E1 -eq 124 ]; then echo "Apply timed out after ${timeout}"; fi A1=$((A1+1)) diff --git a/modules/deploy/destroy.sh.tpl b/modules/deploy/destroy.sh.tpl index e7296cf..c3ff2fd 100644 --- a/modules/deploy/destroy.sh.tpl +++ b/modules/deploy/destroy.sh.tpl @@ -8,8 +8,8 @@ whoami TF_CLI_ARGS_init="" TF_CLI_ARGS_apply="" if [ -z "${skip_destroy}" ]; then - timeout -k 1m ${timeout} terraform init -upgrade -reconfigure - timeout -k 1m ${timeout} terraform destroy -var-file="${deploy_path}/inputs.tfvars" -auto-approve -state="${deploy_path}/tfstate" || true + timeout -k 1m ${timeout} terraform init -upgrade -reconfigure -no-color + timeout -k 1m ${timeout} terraform destroy -var-file="${deploy_path}/inputs.tfvars" -no-color -auto-approve -state="${deploy_path}/tfstate" || true else echo "Not destroying deployed module, it will no longer be managed here." fi diff --git a/run_tests.sh b/run_tests.sh index 003ef07..4cde16d 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -4,23 +4,64 @@ rerun_failed=false specific_test="" specific_package="" cleanup_id="" +slow_mode=false -while getopts ":r:t:p:c:" opt; do +while getopts ":rst:p:c:" opt; do case $opt in r) rerun_failed=true ;; t) specific_test="$OPTARG" ;; p) specific_package="$OPTARG" ;; c) cleanup_id="$OPTARG" ;; + s) slow_mode=true ;; \?) cat <&2 && exit 1 ;; Invalid option -$OPTARG, valid options are -r to re-run failed tests - -t to specify a specific test (eg. TestBase) - -p to specify a specific test package (eg. base) + -s to run tests in slow mode (one at a time to avoid AWS rate limiting) -c to run clean up only with the given id (eg. abc123) + -t to specify a specific test (eg. TestBase) + -p to specify a specific test package (eg. one) +Only one of -c, -t, or -p can be used at a time. EOT esac done +if [ $slow_mode == true ]; then + echo "Running in slow mode: tests will be run one at a time to avoid AWS rate limiting." +elif [ $slow_mode == false ]; then + echo "Running in normal mode: tests will be run in parallel." +fi +if [ $rerun_failed == true ]; then + echo "Rerun failed tests is enabled." +elif [ $rerun_failed == false ]; then + echo "Rerun failed tests is disabled." +fi +if [ -n "$specific_test" ]; then + echo "Specific test to run: $specific_test" +else + echo "No specific test to run." +fi +if [ -n "$specific_package" ]; then + echo "Specific package to run: $specific_package" +else + echo "No specific package to run." +fi +if [ -n "$cleanup_id" ]; then + echo "Cleanup only mode enabled with id: $cleanup_id" +fi +if [ -n "$cleanup_id" ] && { [ -n "$specific_test" ] || [ -n "$specific_package" ]; }; then + echo "Error: Only one of -c, -t, or -p can be used at a time." >&2 + exit 1 +fi +if [ -n "$specific_test" ] && { [ -n "$specific_package" ] || [ -n "$cleanup_id" ]; }; then + echo "Error: Only one of -c, -t, or -p can be used at a time." >&2 + exit 1 +fi +if [ -n "$specific_package" ] && { [ -n "$specific_test" ] || [ -n "$cleanup_id" ]; }; then + echo "Error: Only one of -c, -t, or -p can be used at a time." >&2 + exit 1 +fi + + # shellcheck disable=SC2143 if [ -n "$cleanup_id" ]; then export IDENTIFIER="$cleanup_id" @@ -30,6 +71,7 @@ REPO_ROOT="$(git rev-parse --show-toplevel)" run_tests() { local rerun=$1 + local slow_mode=$2 REPO_ROOT="$(git rev-parse --show-toplevel)" cd "$REPO_ROOT" || exit 1 @@ -85,8 +127,37 @@ EOF else package_pattern="..." fi - # We need both -p and -parallel, as -p sets the number of packages to test in parallel, and -parallel sets the number of tests to run in parallel. - # By setting both to 1, we ensure that tests are run sequentially, which can help avoid AWS rate limiting issues. I does increase the runtime significantly though. + + # We need both -p and -parallel, as -p sets the number of packages to test in parallel, + # and -parallel sets the number of tests to run in parallel. + # By setting both to 1, we ensure that tests are run sequentially, which can help avoid AWS rate limiting issues. + # It does increase the runtime significantly though. + local parallel_packages="" + local parallel_tests="" + if [ "$slow_mode" = true ]; then + echo "Running in slow mode..." + parallel_packages="-p=1" + parallel_tests="-parallel=1" + fi + + CMD=$(cat <