diff --git a/.github/actions/download-artifact/action.yml b/.github/actions/download-artifact/action.yml index f98984ff..5a2fd40b 100644 --- a/.github/actions/download-artifact/action.yml +++ b/.github/actions/download-artifact/action.yml @@ -35,9 +35,13 @@ runs: echo "downloading artifacts from https://github.com/paritytech/ahm-dryrun/actions/runs/${RUN_ID}" if [[ $NAME_PATTERN == "" ]]; then - gh run download $RUN_ID -p "${NAME_PATTERN}" -D $TMP_DIR + gh run download $RUN_ID -D $TMP_DIR else - gh run download $RUN_ID -p "${NAME_PATTERN}" -D $TMP_DIR + PATTERNS="" + for p in $(echo $NAME_PATTERN| tr "," "\n");do + PATTERNS="$PATTERNS -p $p " + done + gh run download $RUN_ID $PATTERNS -D $TMP_DIR fi; ARTIFACTS_DIR=$(ls -t $TMP_DIR|head -1) diff --git a/.github/actions/download-doppelganger-binaries/action.yml b/.github/actions/download-doppelganger-binaries/action.yml index daa2bf3a..51953680 100644 --- a/.github/actions/download-doppelganger-binaries/action.yml +++ b/.github/actions/download-doppelganger-binaries/action.yml @@ -26,7 +26,7 @@ runs: VERSION_TO_DOWNLOAD=$(curl https://api.github.com/repos/paritytech/doppelganger-wrapper/releases/latest | jq -r .tag_name) fi; for bin in doppelganger doppelganger-parachain polkadot-execute-worker polkadot-prepare-worker; do - echo "downloading $bin"; + echo "downloading $bin from https://github.com/paritytech/doppelganger-wrapper/releases/download/$VERSION_TO_DOWNLOAD/$bin"; curl -L -o $DESTINATION_PATH/$bin https://github.com/paritytech/doppelganger-wrapper/releases/download/$VERSION_TO_DOWNLOAD/$bin; chmod 755 $DESTINATION_PATH/$bin; done; diff --git a/.github/actions/download-post-migration-db/action.yml b/.github/actions/download-post-migration-db/action.yml index c4e403d1..50523e57 100644 --- a/.github/actions/download-post-migration-db/action.yml +++ b/.github/actions/download-post-migration-db/action.yml @@ -21,12 +21,31 @@ runs: run: | set -euo pipefail TMP_DIR="$(mktemp -d)" - - curl -sSLf -H "Authorization: Bearer ${GITHUB_TOKEN}" -o "${TMP_DIR}/artifact.zip" "${ARTIFACT_URL}" - + + FILENAME=$(basename ${ARTIFACT_URL}) + + IS_GITHUB=$(echo ${ARTIFACT_URL}| grep -o github || true) + IS_ZIP=$(echo ${ARTIFACT_URL}| grep -o zip || true) + if [[ $IS_GITHUB == "github" ]];then + curl -sSLf -H "Authorization: Bearer ${GITHUB_TOKEN}" -o "${TMP_DIR}/${FILENAME}" "${ARTIFACT_URL}" + else + curl -sSLf -o "${TMP_DIR}/${FILENAME}" "${ARTIFACT_URL}" + fi; + rm -rf "${DESTINATION_PATH}" mkdir -p "${DESTINATION_PATH}" - unzip -q "${TMP_DIR}/artifact.zip" -d "${DESTINATION_PATH}" + + if [[ $IS_ZIP == "zip" ]];then + unzip -q "${TMP_DIR}/${FILENAME}" -d "${DESTINATION_PATH}" + else + tar -xvf "${TMP_DIR}/${FILENAME}" -C "${DESTINATION_PATH}" + fi; + rm -rf "${TMP_DIR}" - - ls "${DESTINATION_PATH}" \ No newline at end of file + echo "ls dst path" + ls "${DESTINATION_PATH}" + mv ${DESTINATION_PATH}/pahm/spawn ${DESTINATION_PATH}/ + mv ${DESTINATION_PATH}/pahm/ready.json ${DESTINATION_PATH}/ + mv ${DESTINATION_PATH}/pahm/ports.json ${DESTINATION_PATH}/ + echo "ls dst path" + ls "${DESTINATION_PATH}" diff --git a/.github/actions/wait-zb-network-ready/action.yml b/.github/actions/wait-zb-network-ready/action.yml index 29ece565..3b3cc916 100644 --- a/.github/actions/wait-zb-network-ready/action.yml +++ b/.github/actions/wait-zb-network-ready/action.yml @@ -17,7 +17,7 @@ runs: shell: bash run: | echo "Waiting for 'network is up and running' message..." - timeout 300 bash -c ' + timeout 900 bash -c ' while ! grep -q "network is up and running" $LOG_FILE 2>/dev/null; do echo "Still waiting for network... last log line:" tail -1 $LOG_FILE diff --git a/.github/workflows/migration-schedule-check.yml b/.github/workflows/migration-schedule-check.yml index 1d689706..961258e9 100644 --- a/.github/workflows/migration-schedule-check.yml +++ b/.github/workflows/migration-schedule-check.yml @@ -23,7 +23,7 @@ permissions: {} jobs: get-run-id: runs-on: ubuntu-latest - if: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.run-id == '') || 'true' }} + if: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.run-id == '') }} outputs: RUN_ID: ${{ steps.get_run_id.outputs.RUN_ID }} steps: diff --git a/.github/workflows/rust-tests-common.yml b/.github/workflows/rust-tests-common.yml index 4a42e413..f49b871c 100644 --- a/.github/workflows/rust-tests-common.yml +++ b/.github/workflows/rust-tests-common.yml @@ -56,38 +56,28 @@ jobs: echo "downloading artifacts from https://github.com/paritytech/ahm-dryrun/actions/runs/${RUN_ID}" gh run download $RUN_ID -p "${NETWORK}-try-runtime*" -D $SNAPS_PATH ls -ltr $SNAPS_PATH - ARTIFACTS_DIR_PRE=$(ls -t $SNAPS_PATH|grep try-runtime-snaps-pre) - ARTIFACTS_DIR_POST=$(ls -t $SNAPS_PATH|grep try-runtime-snaps-post) - echo "ARTIFACTS_DIR_PRE=$SNAPS_PATH/$ARTIFACTS_DIR_PRE" >> $GITHUB_OUTPUT - echo "ARTIFACTS_DIR_POST=$SNAPS_PATH/$ARTIFACTS_DIR_POST" >> $GITHUB_OUTPUT + # Single artifact contains all 4 snapshots + ARTIFACTS_DIR=$(ls -t $SNAPS_PATH|grep try-runtime-snaps|grep -v pre|grep -v post|head -1) + echo "ARTIFACTS_DIR=$SNAPS_PATH/$ARTIFACTS_DIR" >> $GITHUB_OUTPUT - name: verify_and_move_snaps shell: bash env: NETWORK: ${{ inputs.network }} run: | - for i in rc-pre.snap ah-pre.snap;do - if [ ! -f "${{ steps.download_artifacts.outputs.ARTIFACTS_DIR_PRE }}/${NETWORK}-${i}" ]; then + # All 4 snapshots are in a single artifact directory + for i in rc-pre.snap ah-pre.snap rc-post.snap ah-post.snap;do + if [ ! -f "${{ steps.download_artifacts.outputs.ARTIFACTS_DIR }}/${NETWORK}-${i}" ]; then echo "ERROR: Could not find ${NETWORK}-${i}" exit 1 else - mv "${{ steps.download_artifacts.outputs.ARTIFACTS_DIR_PRE }}/${NETWORK}-${i}" $SNAPS_PATH - fi - done; - - for i in rc-post.snap ah-post.snap;do - if [ ! -f "${{ steps.download_artifacts.outputs.ARTIFACTS_DIR_POST }}/${NETWORK}-${i}" ]; then - echo "ERROR: Could not find ${NETWORK}-${i}" - exit 1 - else - mv "${{ steps.download_artifacts.outputs.ARTIFACTS_DIR_POST }}/${NETWORK}-${i}" $SNAPS_PATH + mv "${{ steps.download_artifacts.outputs.ARTIFACTS_DIR }}/${NETWORK}-${i}" $SNAPS_PATH fi done; echo "final files in ${SNAPS_PATH}" ls -ltr $SNAPS_PATH - - name: run_rust_tests shell: bash env: diff --git a/.github/workflows/zombie-bite-common.yml b/.github/workflows/zombie-bite-common.yml index abacbffe..8420d836 100644 --- a/.github/workflows/zombie-bite-common.yml +++ b/.github/workflows/zombie-bite-common.yml @@ -25,6 +25,22 @@ on: required: false type: string default: "" + doppelganger_version: + description: "Tag of the release to use" + required: false + type: string + default: "latest" + pre_db_run_id: + description: "start the process from a pre-db" + required: false + type: string + default: "" + post_db_url: + description: "Url for download a post-db and start from there" + required: false + type: string + default: "" + env: ZOMBIE_BITE_BASE_PATH: "/tmp/ci" @@ -34,10 +50,17 @@ jobs: run_zombie_bite: runs-on: ${{ inputs.runner }} container: - image: docker.io/paritytech/ci-unified:bullseye-1.88.0-2025-06-27-v202507221446 - timeout-minutes: ${{ inputs.runner == 'kusama' && 180 || 720 }} + image: ${{ fromJSON(contains(inputs.runner, 'parity') && '"docker.io/paritytech/ci-unified:bullseye-1.88.0-2025-06-27-v202507221446"' || 'null' )}} + options: ${{ fromJSON(contains(inputs.runner, 'parity') && '"--privileged"' || '"--name pahm"' )}} + timeout-minutes: ${{ inputs.network == 'kusama' && 180 || 720 }} steps: + - name: Free Disk Space (Ubuntu) + if: ${{ runner.environment == 'github-hosted' }} + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + with: + tool-cache: false + - uses: actions/checkout@v4 with: submodules: recursive @@ -53,6 +76,14 @@ jobs: uses: ./.github/actions/download-doppelganger-binaries with: destination-path: ${{ env.AHM_BINS }} + tag-version: ${{ inputs.doppelganger_version }} + + - name: install_deps_if_ubuntu + if: ${{ contains(inputs.runner, 'ubuntu') }} + shell: bash + run: | + sudo apt-get update + sudo apt-get install protobuf-compiler - name: install_zombie_bite shell: bash @@ -62,10 +93,13 @@ jobs: - name: install_try_runtime_cli shell: bash run: | - just install-try-runtime + curl -L -o $AHM_BINS/try-runtime https://github.com/paritytech/try-runtime-cli/releases/download/v0.8.0/try-runtime-x86_64-unknown-linux-musl; + chmod 755 $AHM_BINS/try-runtime; - name: build_runtimes shell: bash + # only runs if we don't have a pre-db or post-db + if: ${{ inputs.pre_db_run_id == '' && inputs.post_db_url == '' }} env: RUSTFLAGS: "-A warnings" NETWORK: ${{ inputs.network }} @@ -95,7 +129,10 @@ jobs: - name: zombie_bite_step_bite id: zombie_bite_step_bite + # only runs if we don't have a pre-db or post-db + if: ${{ inputs.pre_db_run_id == '' && inputs.post_db_url == '' }} shell: bash + timeout-minutes: 45 env: NETWORK: ${{ inputs.network }} SUDO: ${{ inputs.sudo-key }} @@ -141,12 +178,6 @@ jobs: source .env set +a - # @x3c41a is this still needed? - # echo "::group::find-rc-block-bite" - # RC_BITE_BLOCK=$(just find-rc-block-bite $NETWORK) - # echo "Found RC block to bite: $RC_BITE_BLOCK" - # echo "::endgroup::" - export RUST_LOG_RC=$ZOMBIE_BITE_RUST_LOG export RUST_LOG_COL=$ZOMBIE_BITE_RUST_LOG_COL @@ -160,6 +191,8 @@ jobs: continue-on-error: true - name: zombie_bite_upload_step + # only runs if we don't have a pre-db or post-db + if: ${{ inputs.pre_db_run_id == '' && inputs.post_db_url == '' }} uses: actions/upload-artifact@v4 with: name: ${{ inputs.network }}-pre-migration-db-${{ github.sha }}${{ inputs.args }} @@ -168,31 +201,95 @@ jobs: ${{ env.ZOMBIE_BITE_BASE_PATH }}/ports.json ${{ env.ZOMBIE_BITE_BASE_PATH }}/ready.json + - name: download_pre_migration_db + # only runs if we have a pre-db + if: ${{ inputs.pre_db_run_id != '' }} + uses: ./.github/actions/download-artifact + env: + RUN_ID: ${{ inputs.pre_db_run_id }} + GH_TOKEN: ${{ github.token }} + NETWORK: ${{ inputs.network }} + with: + gh-token: ${{ env.GH_TOKEN }} + destination-path: ${{ env.ZOMBIE_BITE_BASE_PATH }} + run-id: ${{ env.RUN_ID }} + name-pattern: "${{ env.NETWORK }}-pre-migration-db-*,${{env.NETWORK}}-zb-step-bite-*" + - name: checkpoint_bite + # only runs if we don't have a post-db + if: ${{ inputs.post_db_url == '' }} shell: bash + env: + RUN_ID: ${{ inputs.pre_db_run_id }} + RUNNER: ${{ inputs.runner }} run: | echo "::group::debug" - ls /tmp/* ls ${{ env.ZOMBIE_BITE_BASE_PATH }}/bite - ls ${{ env.ZOMBIE_BITE_BASE_PATH }}/bite-debug + if [[ $RUN_ID == "" ]];then + ls ${{ env.ZOMBIE_BITE_BASE_PATH }}/bite-debug + fi; + if [[ $RUNNER =~ "parity" ]];then + ls /tmp/* + fi; echo "::endgroup::" - name: run_zombie_bite_spawn + # only runs if we don't have a post-db + if: ${{ inputs.post_db_url == '' }} shell: bash run: | export PATH=${AHM_BINS}:$PATH # remove previous nohup.out rm nohup.out || true - nohup zombie-bite spawn -d $ZOMBIE_BITE_BASE_PATH > nohup.out 2>&1 & + nohup zombie-bite spawn -d $ZOMBIE_BITE_BASE_PATH > $ZOMBIE_BITE_BASE_PATH/nohup.out 2>&1 & - name: wait_for_network + # only runs if we don't have a post-db + if: ${{ inputs.post_db_url == '' }} + id: wait_for_network uses: ./.github/actions/wait-zb-network-ready + with: + log-file: ${{ env.ZOMBIE_BITE_BASE_PATH }}/nohup.out + continue-on-error: true + + - name: wait_for_nodes + # only runs if we don't have a post-db + if: ${{ inputs.post_db_url == '' }} + id: wait_for_nodes + shell: bash + env: + NETWORK: ${{ inputs.network }} + TS_LOG_CONSOLE: true + TS_LOG_LEVEL: debug + run: | + # Wait for nodes to come online + just zb wait-for-nodes $ZOMBIE_BITE_BASE_PATH + continue-on-error: true + + - name: upload_logs_on_failure + if: steps.wait_for_nodes.outcome == 'failure' || steps.wait_for_network.outcome == 'failure' + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.network }}-spawn-failure-logs-${{ github.sha }}${{ inputs.args }} + path: | + ${{ env.ZOMBIE_BITE_BASE_PATH }}/spawn/collator/collator.log + ${{ env.ZOMBIE_BITE_BASE_PATH }}/spawn/alice/alice.log + ${{ env.ZOMBIE_BITE_BASE_PATH }}/spawn/bob/bob.log + ${{ env.ZOMBIE_BITE_BASE_PATH }}/nohup.out + + - name: early_exit_if_spawn_fail + if: steps.wait_for_nodes.outcome == 'failure' + shell: bash + run: exit 1 - name: run_migration + # only runs if we don't have a post-db + if: ${{ inputs.post_db_url == '' }} id: zombie_bite_step_spawn_and_run_migration shell: bash env: NETWORK: ${{ inputs.network }} + PRE_DB_RUN_ID: ${{ inputs.pre_db_run_id }} TS_LOG_CONSOLE: true TS_LOG_LEVEL: debug run: | @@ -201,33 +298,18 @@ jobs: ALICE_PORT=$(jq -r .alice_port ${ZOMBIE_BITE_BASE_PATH}/ports.json) COL_PORT=$(jq -r .collator_port ${ZOMBIE_BITE_BASE_PATH}/ports.json) - # Wait for nodes to come online - echo "::group::wait-for-nodes" - just zb wait-for-nodes $ZOMBIE_BITE_BASE_PATH - echo "::endgroup::" - echo "::group::run-migration" - # Start both pre and post-migration snapshot monitoring before scheduling migration - echo "Starting pre-migration snapshot monitoring..." - nohup node dist/zombie-bite-scripts/migration_snapshot.js $ZOMBIE_BITE_BASE_PATH $NETWORK pre > pre_snapshot.log 2>&1 & - PRE_SNAPSHOT_PID=$! - - echo "Starting post-migration snapshot monitoring..." - nohup node dist/zombie-bite-scripts/migration_snapshot.js $ZOMBIE_BITE_BASE_PATH $NETWORK post > post_snapshot.log 2>&1 & - POST_SNAPSHOT_PID=$! + # Start migration snapshot monitoring (polls for completion and takes all 4 snapshots) + echo "Starting migration snapshot monitoring in background..." + nohup node dist/zombie-bite-scripts/migration_snapshot.js $ZOMBIE_BITE_BASE_PATH $NETWORK monitOnly 2>&1 | tee -a migration_snapshot.log & + SNAPSHOT_PID=$! # Schedule the migration node dist/zombie-bite-scripts/migration_shedule_migration.js $ALICE_PORT - # Monitor migration completion (this waits until both chains reach MigrationDone) - node dist/zombie-bite-scripts/migration_finished_monitor.js $ZOMBIE_BITE_BASE_PATH $ALICE_PORT $COL_PORT - - # Wait for both snapshot processes to complete - echo "Waiting for pre-migration snapshot to complete..." - wait $PRE_SNAPSHOT_PID || echo "Pre-snapshot process already completed" - - echo "Waiting for post-migration snapshot to complete..." - wait $POST_SNAPSHOT_PID || echo "Post-snapshot process already completed" + # Wait for snapshot process to complete (polls for MigrationDone, scans blocks, takes snapshots) + echo "Waiting for migration to complete and snapshots to finish..." + wait $SNAPSHOT_PID || echo "Snapshot process already completed" echo "::endgroup::" @@ -258,18 +340,9 @@ jobs: echo "::endgroup::" continue-on-error: true - - name: try_runtime_upload_step_pre - uses: actions/upload-artifact@v4 - with: - name: ${{ inputs.network }}-try-runtime-snaps-pre-${{ github.sha }}${{ inputs.args }} - path: | - ${{ env.ZOMBIE_BITE_BASE_PATH }}/${{ inputs.network }}-rc-pre.snap - ${{ env.ZOMBIE_BITE_BASE_PATH }}/${{ inputs.network }}-ah-pre.snap - ${{ env.ZOMBIE_BITE_BASE_PATH }}/pre_migration_snapshot_info.json - ${{ env.ZOMBIE_BITE_BASE_PATH }}/pre_migration_snapshot_done.txt - ${{ env.ZOMBIE_BITE_BASE_PATH }}/pre_snapshot.log - - name: checkpoint_spawn + # only runs if we don't have a post-db + if: ${{ inputs.post_db_url == '' }} shell: bash run: | echo "::group::debug" @@ -278,18 +351,9 @@ jobs: ls ${{ env.ZOMBIE_BITE_BASE_PATH }} echo "::endgroup::" - - name: try_runtime_upload_step_post - uses: actions/upload-artifact@v4 - with: - name: ${{ inputs.network }}-try-runtime-snaps-post-${{ github.sha }}${{ inputs.args }} - path: | - ${{ env.ZOMBIE_BITE_BASE_PATH }}/${{ inputs.network }}-rc-post.snap - ${{ env.ZOMBIE_BITE_BASE_PATH }}/${{ inputs.network }}-ah-post.snap - ${{ env.ZOMBIE_BITE_BASE_PATH }}/post_migration_snapshot_info.json - ${{ env.ZOMBIE_BITE_BASE_PATH }}/post_migration_snapshot_done.txt - ${{ env.ZOMBIE_BITE_BASE_PATH }}/post_snapshot.log - - name: zombie_bite_upload_step_logs + # only runs if we don't have a post-db + if: ${{ inputs.post_db_url == '' }} uses: actions/upload-artifact@v4 with: name: ${{ inputs.network }}-post-migration-logs-${{ github.sha }}${{ inputs.args }} @@ -299,6 +363,8 @@ jobs: ${{ env.ZOMBIE_BITE_BASE_PATH }}/spawn-debug/bob/bob.log - name: zombie_bite_upload_step + # only runs if we don't have a post-db + if: ${{ inputs.post_db_url == '' }} uses: actions/upload-artifact@v4 with: name: ${{ inputs.network }}-post-migration-db-${{ github.sha }}${{ inputs.args }} @@ -308,15 +374,16 @@ jobs: ${{ env.ZOMBIE_BITE_BASE_PATH }}/ready.json ${{ env.ZOMBIE_BITE_BASE_PATH }}/migration_done.json - - name: try_runtime_rust_test - id: try_runtime_rust_test - shell: bash + - name: download_post_migration_db + # only runs if we have a post-db + if: ${{ inputs.post_db_url != '' }} + uses: ./.github/actions/download-post-migration-db env: - NETWORK: ${{ inputs.network }} - run: | - export PATH=${AHM_BINS}:$PATH - just ahm rust-test $NETWORK $ZOMBIE_BITE_BASE_PATH - continue-on-error: true + ARTIFACT_URL: ${{ inputs.post_db_url }} + GH_TOKEN: ${{ github.token }} + with: + destination-path: ${{ env.ZOMBIE_BITE_BASE_PATH }} + artifact-url: ${{ env.ARTIFACT_URL }} - name: run_zombie_bite_post shell: bash @@ -324,10 +391,56 @@ jobs: export PATH=${AHM_BINS}:$PATH # remove previous nohup.out rm nohup.out || true - nohup just zb spawn $ZOMBIE_BITE_BASE_PATH post > nohup.out 2>&1 & + nohup just zb spawn $ZOMBIE_BITE_BASE_PATH post > $ZOMBIE_BITE_BASE_PATH/nohup.out 2>&1 & - name: wait_for_network + id: wait_for_network_post uses: ./.github/actions/wait-zb-network-ready + with: + log-file: ${{ env.ZOMBIE_BITE_BASE_PATH }}/nohup.out + continue-on-error: true + + - name: upload_on_fail + if: steps.wait_for_network_post.outcome == 'failure' + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.network }}-spawn-post-failure-logs-${{ github.sha }} + path: | + ${{ env.ZOMBIE_BITE_BASE_PATH }}/nohup.out + + - name: take_try_runtime_snaps + shell: bash + env: + NETWORK: ${{ inputs.network }} + TS_LOG_CONSOLE: true + TS_LOG_LEVEL: debug + run: | + export PATH=${AHM_BINS}:$PATH + node dist/zombie-bite-scripts/migration_snapshot.js $ZOMBIE_BITE_BASE_PATH $NETWORK + continue-on-error: true + + - name: try_runtime_upload_step_snapshots + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.network }}-try-runtime-snaps-${{ github.sha }}${{ inputs.args }} + path: | + ${{ env.ZOMBIE_BITE_BASE_PATH }}/${{ inputs.network }}-rc-pre.snap + ${{ env.ZOMBIE_BITE_BASE_PATH }}/${{ inputs.network }}-ah-pre.snap + ${{ env.ZOMBIE_BITE_BASE_PATH }}/${{ inputs.network }}-rc-post.snap + ${{ env.ZOMBIE_BITE_BASE_PATH }}/${{ inputs.network }}-ah-post.snap + ${{ env.ZOMBIE_BITE_BASE_PATH }}/migration_snapshot_info.json + ${{ env.ZOMBIE_BITE_BASE_PATH }}/migration_snapshot_done.txt + ${{ env.ZOMBIE_BITE_BASE_PATH }}/migration_snapshot.log + + - name: try_runtime_rust_test + id: try_runtime_rust_test + shell: bash + env: + NETWORK: ${{ inputs.network }} + run: | + export PATH=${AHM_BINS}:$PATH + just ahm rust-test $NETWORK $ZOMBIE_BITE_BASE_PATH + continue-on-error: true - name: ts_comparison_tests id: ts_comparison_tests diff --git a/.github/workflows/zombie-bite.yml b/.github/workflows/zombie-bite.yml index 4ad1c69b..f62cb2d9 100644 --- a/.github/workflows/zombie-bite.yml +++ b/.github/workflows/zombie-bite.yml @@ -8,7 +8,7 @@ on: required: false network: description: "Network to use (ALL means Polkadot and Kusama)" - default: "ALL" + default: "polkadot" type: choice options: - ALL @@ -24,31 +24,53 @@ on: required: false type: string default: "" + pre_db_run_id: + description: "Run ID to Start the process from a pre-db from a previous run" + required: false + type: string + default: "" + post_db_url: + description: "Url for download a post-db and start from there" + required: false + type: string + default: "" + doppelganger_version: + description: "Tag of the release to use" + required: false + type: string + default: "latest" + runner: + description: "Runner to run the jobs" + required: false + type: string + default: "parity-large-persistent-test" schedule: - - cron: "0 0 * * *" # Every Day for Kusama - - cron: "0 0 * * 0" # Only on Sundays for Polkadot + - cron: "0 0 * * *" # Every Day for Polkadot concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ inputs.runner }} cancel-in-progress: true permissions: {} jobs: zombie_bite_polkadot: - # run by schedule only in Sundays or by demand - if: ${{ inputs.network == 'ALL' || inputs.network == 'polkadot' || github.event.schedule == '0 0 * * 0' }} + # run by schedule or by demand + if: ${{ inputs.network == 'ALL' || inputs.network == 'polkadot' || github.event.schedule == '0 0 * * *' }} uses: ./.github/workflows/zombie-bite-common.yml with: network: polkadot sudo-key: ${{ inputs.sudo-key }} rc_runtime_override_url: ${{ inputs.rc_runtime_override_url }} ah_runtime_override_url: ${{ inputs.ah_runtime_override_url }} - runner: parity-large-persistent-test + runner: ${{ inputs.runner }} + doppelganger_version: ${{ inputs.doppelganger_version }} + pre_db_run_id: ${{ inputs.pre_db_run_id }} + post_db_url: ${{ inputs.post_db_url }} zombie_bite_kusama: - # run by schedule everyday or by demand - if: ${{ inputs.network == 'ALL' || inputs.network == 'kusama' || github.event.schedule == '0 0 * * *' }} + # run by demand + if: ${{ inputs.network == 'ALL' || inputs.network == 'kusama' }} uses: ./.github/workflows/zombie-bite-common.yml with: network: kusama @@ -56,4 +78,3 @@ jobs: rc_runtime_override_url: ${{ inputs.rc_runtime_override_url }} ah_runtime_override_url: ${{ inputs.ah_runtime_override_url }} runner: parity-large-persistent-test - diff --git a/justfile b/justfile index 486616bd..f80cd483 100644 --- a/justfile +++ b/justfile @@ -69,6 +69,7 @@ build runtime: compare-state base_path runtime: just ahm _npm-build + npm run create-migration-done-file {{ base_path }} npm run compare-state {{ base_path }} {{ runtime }} find-rc-block-bite network="kusama": @@ -84,42 +85,42 @@ make-new-snapshot base_path: e2e-tests NETWORK: #!/usr/bin/env bash set -e - + # Validate NETWORK argument if [[ "{{ NETWORK }}" != "kusama" && "{{ NETWORK }}" != "polkadot" ]]; then echo "Error: NETWORK must be one of: kusama, polkadot" exit 1 fi - + # Check required environment variables in PET's .env file NETWORK_UPPER="{{ NETWORK }}" NETWORK_UPPER=${NETWORK_UPPER^^} ENDPOINT_VAR="ASSETHUB${NETWORK_UPPER}_ENDPOINT" BLOCK_VAR="ASSETHUB${NETWORK_UPPER}_BLOCK_NUMBER" - + # Load PET's .env file if it exists. # If not, log that, and run with PET's default. Will cause meaningless test failures if run on an umigrated network # due to absence of required pallets. if [[ -f "${PET_PATH}/.env" ]]; then source "${PET_PATH}/.env" fi - + if [[ -z "${!ENDPOINT_VAR}" ]]; then echo "Warning: ${ENDPOINT_VAR} environment variable is not set in ${PET_PATH}/.env" echo "Running with default PET endpoint for network {{ NETWORK }} (check PET source code)" fi - + if [[ -z "${!BLOCK_VAR}" ]]; then echo "Warning: ${BLOCK_VAR} environment variable is not set in ${PET_PATH}/.env" echo "Running with default block number for network {{ NETWORK }} (check PET source code)" fi - + echo "Running tests with:" echo " ${ENDPOINT_VAR}=${!ENDPOINT_VAR}" echo " ${BLOCK_VAR}=${!BLOCK_VAR}" - + cd polkadot-ecosystem-tests - + # Install dependencies yarn install @@ -129,13 +130,13 @@ e2e-tests NETWORK: NETWORK_CAPITALIZED="{{ NETWORK }}" NETWORK_CAPITALIZED=${NETWORK_CAPITALIZED^} find packages -name "*assetHub${NETWORK_CAPITALIZED}*e2e*.test.ts" -type f > /tmp/test_list.txt - + # Set up interrupt handler to exit on `CTRL^C` without starting the next set of tests # `pkill -P $$` kills all descendant processes which were spawned by the current process, to avoid leaving # orphaned processes running. # `exit 130` is the standard signal for `SIGINT` in bash. trap 'echo -e "\nInterrupted. Killing yarn processes and exiting..."; pkill -P $$; exit 130' INT - + while read -r test; do echo "Running E2E test: $test" if ! yarn test "$test" -u; then @@ -145,10 +146,10 @@ e2e-tests NETWORK: test_results="${test_results}✅ Test passed: $test\n" fi done < /tmp/test_list.txt - + # Print results and failure count echo -e "$test_results" echo "Total failed tests: $failed_count" - + # Exit with failed count as exit code exit $failed_count \ No newline at end of file diff --git a/justfiles/zb.justfile b/justfiles/zb.justfile index 5a06e63b..7309622c 100644 --- a/justfiles/zb.justfile +++ b/justfiles/zb.justfile @@ -1,4 +1,5 @@ # Run in the project root + set working-directory := ".." _default: help @@ -17,8 +18,8 @@ bite base_path runtime: PATH=$(pwd)/${DOPPELGANGER_PATH}/bin:$PATH \ zombie-bite bite -d {{ base_path }} \ -r {{ runtime }} \ - --rc-override "${RUNTIME_WASM}/{{runtime}}_runtime.compact.compressed.wasm" \ - --ah-override "${RUNTIME_WASM}/asset_hub_{{runtime}}_runtime.compact.compressed.wasm" + --rc-override "${RUNTIME_WASM}/{{ runtime }}_runtime.compact.compressed.wasm" \ + --ah-override "${RUNTIME_WASM}/asset_hub_{{ runtime }}_runtime.compact.compressed.wasm" # Second part of the Zombie-Bite flow. This "spawns" the network with the forked state. spawn base_path *step: @@ -134,8 +135,8 @@ wait-for-nodes base_path: echo "${name} node ready" return 0 fi - echo "Attempt $i/10: ${name} node not ready, waiting 5s..." - sleep 5 + echo "Attempt $i/10: ${name} node not ready, waiting 10s..." + sleep 10 done echo "ERROR: ${name} node failed to become ready after 10 attempts" return 1 @@ -150,20 +151,22 @@ wait-for-nodes base_path: # Wait for both to complete wait $RC_PID + RC_READY_ECODE=$? wait $AH_PID + AH_READY_ECODE=$? - echo "Both nodes are ready" + EXIT_CODE=$((RC_READY_ECODE + AH_READY_ECODE)) + if [[ $EXIT_CODE -eq 0 ]];then + echo "Both nodes are ready"; + else + echo "Node/s are not ready"; + fi; -# Monitor for AccountsMigrationInit and take pre-migration snapshot -monitor-pre-snapshot base_path network: - #!/usr/bin/env bash - set -xe - just ahm _npm-build - node dist/zombie-bite-scripts/migration_snapshot.js {{ base_path }} {{ network }} pre + exit $EXIT_CODE; -# Monitor for MigrationDone and take post-migration snapshot -monitor-post-snapshot base_path network: +# Monitor migration and take all 4 snapshots (pre/post for RC and AH) after migration completes +monitor-snapshots base_path network: #!/usr/bin/env bash set -xe just ahm _npm-build - node dist/zombie-bite-scripts/migration_snapshot.js {{ base_path }} {{ network }} post + node dist/zombie-bite-scripts/migration_snapshot.js {{ base_path }} {{ network }} diff --git a/package.json b/package.json index 9903387f..9acada38 100644 --- a/package.json +++ b/package.json @@ -15,6 +15,7 @@ "compare-state": "node dist/migration-tests/index.js", "find-rc-block-bite": "node dist/zombie-bite-scripts/find_rc_block_bite.js", "make-new-snapshot": "node dist/zombie-bite-scripts/make_new_snapshot.js", + "create-migration-done-file": "node dist/zombie-bite-scripts/create_migration_done_file.js", "ahm": "node dist/zombie-bite-scripts/orchestrator.js", "prettier": "prettier --write .", "clean": "rm -rf dist", diff --git a/runtimes b/runtimes index a1f1b221..3039b0c8 160000 --- a/runtimes +++ b/runtimes @@ -1 +1 @@ -Subproject commit a1f1b2211180ccfe593da5653791c0f7e07b9488 +Subproject commit 3039b0c88ace9471e75a6da4efd378817ee56e34 diff --git a/zombie-bite-scripts/create_migration_done_file.ts b/zombie-bite-scripts/create_migration_done_file.ts new file mode 100644 index 00000000..e250a4f0 --- /dev/null +++ b/zombie-bite-scripts/create_migration_done_file.ts @@ -0,0 +1,21 @@ +/* + * Wrapper to call the `createMigrationDoneFile` fn (defined in helpers) + * accept 3 positional arguments + * base_path: the path where we will writing the migration end info (rc_block_end, ah_block_end) in a json file + * rc_port: The port to use to connect to alice + * ah_port: The port to use to connect to the collator + */ + +import { createMigrationDoneFile } from "./helpers.js"; +import { logger } from "../shared/logger.js"; + +const main = async () => { + let base_path = process.argv[2]; + let rc_port = process.argv[3]; + let ah_port = process.argv[4]; + logger.info("🔎 Creating migration_done.json file..."); + await createMigrationDoneFile(base_path, rc_port, ah_port); + process.exit(0); +}; + +main().catch(console.log); \ No newline at end of file diff --git a/zombie-bite-scripts/helpers.ts b/zombie-bite-scripts/helpers.ts index 3967416c..0e015fed 100644 --- a/zombie-bite-scripts/helpers.ts +++ b/zombie-bite-scripts/helpers.ts @@ -1,7 +1,8 @@ import { ApiPromise, WsProvider, Keyring } from "@polkadot/api"; import { cryptoWaitReady } from "@polkadot/util-crypto"; -import { promises as fs_promises } from "fs"; +import { promises as fs_promises, readFileSync } from "fs"; import { logger } from "../shared/logger.js"; +import { join } from "path"; const rcPort = process.env.ZOMBIE_BITE_ALICE_PORT || 63168; const ahPort = process.env.ZOMBIE_BITE_AH_PORT || 63170; @@ -44,6 +45,55 @@ export async function delay(ms: number) { return new Promise((resolve) => setTimeout(resolve, ms)); } +export async function getAHPort(basePath: string): Promise { + try { + const portsFile = join(basePath, "ports.json"); + const ports = JSON.parse(readFileSync(portsFile, "utf8")); + return ports.collator_port; + } catch (error) { + logger.error("Could not read ports.json, using default port 63170"); + return 63170; + } +} + +export async function getRCPort(basePath: string): Promise { + try { + const portsFile = join(basePath, "ports.json"); + const ports = JSON.parse(readFileSync(portsFile, "utf8")); + return ports.alice_port; + } catch (error) { + logger.error("Could not read ports.json, using default port 63168"); + return 63168; + } +} + +export async function createMigrationDoneFile( + base_path?: string, + rc_port?: string | number, + ah_port?: string | number, +) { + const base_path_to_use = base_path || "."; + const rc_uri = `ws://localhost:${rc_port || rcPort}`; + const ah_uri = `ws://localhost:${ah_port || ahPort}`; + + const rc_api = await connect(rc_uri); + const ah_api = await connect(ah_uri); + + const rc_end_block = await rc_api.query.rcMigrator.migrationEndBlock(); + const ah_end_block = await ah_api.query.ahMigrator.migrationEndBlock(); + + const content = { + rc_finish_block: rc_end_block.toJSON(), + ah_finish_block: ah_end_block.toJSON(), + }; + + await fs_promises.writeFile( + `${base_path_to_use}/migration_done.json`, + JSON.stringify(content), + ); + return content; +} + export async function monitMigrationFinish( base_path?: string, rc_port?: string | number, @@ -83,6 +133,19 @@ function migration_done(stage: any) { return JSON.stringify(stage) == '"MigrationDone"'; } +export function isAccountsMigrationInit(stage: any): boolean { + return JSON.stringify(stage) === '"AccountsMigrationInit"'; +} + +export function isDataMigrationOngoing(stage: any): boolean { + return JSON.stringify(stage) === '"DataMigrationOngoing"'; +} + +export function isCoolOff(stage: any): boolean { + const stageStr = JSON.stringify(stage); + return stageStr ? stageStr.includes('"CoolOff"') : false; +} + async function rc_check(uri: string) { return new Promise(async (resolve) => { logger.info('Checking RC migration status', { uri }); @@ -238,6 +301,7 @@ export async function checkScheduleMigrationCallStatus(atBlock: string, status: } } + export async function scheduleMigration(migration_args?: scheduleMigrationArgs) { logger.info('migration_args', migration_args); const rc_uri = `ws://localhost:${migration_args && migration_args.rc_port || rcPort}`; @@ -258,10 +322,16 @@ export async function scheduleMigration(migration_args?: scheduleMigrationArgs) finalization = migration_args && migration_args.finalization ? true : false; + // use current as default + let migration_final_args = [start, warm_up_end, cool_off_end, ignore_staking_check]; + // if is an old pre-db use the old call + if( process.env["PRE_DB_RUN_ID"] && process.env["PRE_DB_RUN_ID"] <= "16895620428") migration_final_args = [start, cool_off_end]; + logger.info('Scheduling migration', { start, warm_up_end, cool_off_end,ignore_staking_check, nonce, finalization }); + logger.info('Final args to pass to scheduleMigration call', migration_final_args); return new Promise(async (resolve, reject) => { - const unsub: any = await api.tx.rcMigrator.scheduleMigration(start, warm_up_end, cool_off_end, ignore_staking_check) + const unsub: any = await api.tx.rcMigrator.scheduleMigration(...migration_final_args) .signAndSend(alice, { nonce: nonce, era: 0 }, (result) => { logger.info('Migration transaction status', { status: result.status.toString() }); diff --git a/zombie-bite-scripts/migration_snapshot.ts b/zombie-bite-scripts/migration_snapshot.ts index b45c9797..a50191f2 100644 --- a/zombie-bite-scripts/migration_snapshot.ts +++ b/zombie-bite-scripts/migration_snapshot.ts @@ -1,522 +1,456 @@ -// Monitor RC and AH migration stages and take pre/post-migration snapshots +// Take migration snapshots by scanning blocks after migration completes // -// Pre-migration: Monitors RC for AccountsMigrationInit, then finds the corresponding AH block -// Post-migration: Monitors both RC and AH in parallel, waits for both to reach MigrationDone +// Strategy: +// 1. Poll (via RPC) until both chains reach MigrationDone +// 2. Scan BACKWARDS from current block to find post-migration blocks (CoolOff) +// 3. Scan FORWARDS from migration start to find pre-migration blocks +// - RC: AccountsMigrationInit +// - AH: DataMigrationOngoing + 1 +// 4. Take all 4 snapshots when nodes are idle // -// Usage: node dist/zombie-bite-scripts/migration_pre_snapshot.js +// Usage: node dist/zombie-bite-scripts/migration_snapshot.js import { ApiPromise, WsProvider } from "@polkadot/api"; -import type { Header } from "@polkadot/types/interfaces"; import { readFileSync, writeFileSync } from "fs"; import { join } from "path"; import { exec } from "child_process"; import { promisify } from "util"; import { logger } from "../shared/logger.js"; +import { + isAccountsMigrationInit, + isDataMigrationOngoing, + isCoolOff, + getAHPort, + getRCPort, +} from "./helpers.js"; const execAsync = promisify(exec); -// Get base path, network, and snapshot type from command line args const basePath = process.argv[2]; const network = process.argv[3]; -const snapshotType = process.argv[4]; // 'pre' or 'post' +// IFF is set will exit after the migration is done. +const onlyMonit = process.argv[4]; -if (!basePath || !network || !snapshotType) { - logger.error( - "Usage: node migration_snapshot.js ", - ); +if (!basePath || !network) { + logger.error("Usage: node migration_snapshot.js "); process.exit(1); } -if (snapshotType !== "pre" && snapshotType !== "post") { - logger.error("Error: snapshot type must be 'pre' or 'post'"); - process.exit(1); -} -async function getAHPort(basePath: string): Promise { - try { - const portsFile = join(basePath, "ports.json"); - const ports = JSON.parse(readFileSync(portsFile, "utf8")); - return ports.collator_port; - } catch (error) { - logger.error("Could not read ports.json, using default port 63170"); - return 63170; // Default AH port - } + +interface SnapshotBlocks { + rcPreBlock: string; + rcPreBlockNumber: number; + ahPreBlock: string; + ahPreBlockNumber: number; + rcPostBlock: string; + rcPostBlockNumber: number; + ahPostBlock: string; + ahPostBlockNumber: number; } -async function getRCPort(basePath: string): Promise { +// Create a fresh API connection for a single query, then disconnect +async function queryOnce( + port: number, + query: (api: ApiPromise) => Promise, +): Promise { + const provider = new WsProvider(`ws://127.0.0.1:${port}`, 1000, {}, 5000); + const api = await ApiPromise.create({ provider }); try { - const portsFile = join(basePath, "ports.json"); - const ports = JSON.parse(readFileSync(portsFile, "utf8")); - return ports.alice_port; - } catch (error) { - logger.error("Could not read ports.json, using default port 63168"); - return 63168; // Default RC port + return await query(api); + } finally { + await api.disconnect(); } } -// Check if stage is AccountsMigrationInit -function isAccountsMigrationInit(stage: any): boolean { - // Handle both string and object representations - if (typeof stage === "string") { - return stage === "AccountsMigrationInit"; - } +// Poll until both chains reach MigrationDone +async function waitForMigrationDone( + rcPort: number, + ahPort: number, +): Promise { + logger.info("Polling for migration completion..."); - // Handle object representation - if (typeof stage === "object" && stage !== null) { - return ( - "AccountsMigrationInit" in stage || - JSON.stringify(stage).includes("AccountsMigrationInit") - ); - } + const pollInterval = (process.env["AHM_BINS"] ? 1 : 5) * 60 * 1000; // 1m in ci / 5 by default + const maxWaitTime = 12 * 60 * 60 * 1000; // 12 hours + const startTime = Date.now(); - return false; -} + let rcDone = false; + let ahDone = false; -// Check if stage is MigrationDone -function isMigrationDone(stage: any): boolean { - // Handle both string and object representations - if (typeof stage === "string") { - return stage === "MigrationDone"; - } + while (!rcDone || !ahDone) { + if (Date.now() - startTime > maxWaitTime) { + throw new Error("Timeout waiting for migration to complete (12 hours)"); + } - // Handle object representation - if (typeof stage === "object" && stage !== null) { - return ( - "MigrationDone" in stage || - JSON.stringify(stage).includes("MigrationDone") - ); + try { + // Check RC migration stage + if (!rcDone) { + const [rcStage, atBlock] = await queryOnce(rcPort, async (api) => { + const block = await api.query.system.number(); + const stage = await api.query.rcMigrator.rcMigrationStage(); + return [stage.toHuman(), block]; + }); + logger.info(`RC[#${atBlock}] migration stage: ${JSON.stringify(rcStage)}`); + + if (JSON.stringify(rcStage) === '"MigrationDone"') { + logger.info("✅ RC migration complete!"); + rcDone = true; + } + } + + // Check AH migration stage + if (!ahDone) { + const [ahStage, atBlock] = await queryOnce(ahPort, async (api) => { + const block = await api.query.system.number(); + const stage = await api.query.ahMigrator.ahMigrationStage(); + return [stage.toHuman(), block]; + }); + logger.info(`AH[#${atBlock}] migration stage: ${JSON.stringify(ahStage)}`); + + if (JSON.stringify(ahStage) === '"MigrationDone"') { + logger.info("✅ AH migration complete!"); + ahDone = true; + } + } + + if (!rcDone || !ahDone) { + logger.info(`Waiting ${pollInterval / 1000}s before next poll...`); + await new Promise((resolve) => setTimeout(resolve, pollInterval)); + } + } catch (error) { + logger.warn("Error polling migration status:", error); + logger.info(`Retrying in ${pollInterval / 1000}s...`); + await new Promise((resolve) => setTimeout(resolve, pollInterval)); + } } - return false; + logger.info("🎉 Both chains reached MigrationDone!"); } -// Find the AH block that corresponds to a given RC block by looking at parachain backing -async function findCorrespondingAHBlock( - rcApi: ApiPromise, - ahApi: ApiPromise, - rcBlockHash: string, -): Promise { - logger.info(`Looking for AH block backed in RC block ${rcBlockHash}`); - - // Get the RC block to examine which parachain blocks were backed - const rcBlock = await rcApi.rpc.chain.getBlock(rcBlockHash); - const rcHeader = rcBlock.block.header; +// Scan backwards from endBlock to startBlock to find first block matching predicate +async function scanBackwards( + port: number, + endBlock: number, + startBlock: number, + predicate: (stage: any) => boolean, + chainName: string, + targetName: string, +): Promise<{ blockNumber: number; blockHash: string }> { + logger.info( + `Scanning ${chainName} backwards from block ${endBlock} to ${startBlock} for ${targetName}...`, + ); - // Look for the AH block with validation data pointing to this RC block or nearby - let ahCurrentHash = await ahApi.rpc.chain.getFinalizedHead(); - let attempts = 0; - const maxAttempts = 50; // Look back further + for (let blockNum = endBlock; blockNum >= startBlock; blockNum--) { + if (blockNum % 100 === 0) { + logger.debug(`${chainName} scanning block ${blockNum}...`); + } - while (attempts < maxAttempts) { try { - const ahBlock = await ahApi.rpc.chain.getBlock(ahCurrentHash); - - // Look for setValidationData extrinsic in this AH block - for (const extrinsic of ahBlock.block.extrinsics) { - const { method } = extrinsic; - - if ( - method.section === "parachainSystem" && - method.method === "setValidationData" - ) { - const args = method.args[0] as any; - const relayParentNumber = - args.validationData.relayParentNumber.toNumber(); - const rcBlockNumber = rcHeader.number.toNumber(); - - logger.info( - `Checking AH block ${ahCurrentHash}: validation parent = ${relayParentNumber}, target RC block = ${rcBlockNumber}`, - ); - - if (relayParentNumber === rcBlockNumber) { - logger.info( - `✅ Found exact matching AH block at ${ahCurrentHash} (AH validation parent: ${relayParentNumber}, RC block: ${rcBlockNumber})`, - ); - return ahCurrentHash.toString(); - } - } + const result = await queryOnce(port, async (api) => { + const hash = await api.rpc.chain.getBlockHash(blockNum); + const apiAt = await api.at(hash); + const stage = + chainName === "RC" + ? await apiAt.query.rcMigrator.rcMigrationStage() + : await apiAt.query.ahMigrator.ahMigrationStage(); + + return { + hash: hash.toString(), + stage: stage.toHuman(), + }; + }); + + if (predicate(result.stage)) { + logger.info( + `✅ Found ${chainName} ${targetName} at block ${blockNum} (${result.hash})`, + ); + return { blockNumber: blockNum, blockHash: result.hash }; } } catch (error) { - // Silent retry on error + logger.warn(`Error scanning block ${blockNum}:`, error); + // Continue scanning } - - // Move to parent AH block - const ahHeader = await ahApi.rpc.chain.getHeader(ahCurrentHash); - ahCurrentHash = ahHeader.parentHash; - attempts++; } throw new Error( - `Failed to find synchronized AH block after ${maxAttempts} attempts. ` + - `Searched for AH block with validation parent == ${rcHeader.number.toNumber()} or ${rcHeader.number.toNumber() - 1}. ` + - `This indicates a synchronization issue between RC and AH chains. ` + - `RC block: ${rcBlockHash}, RC block number: ${rcHeader.number.toNumber()}`, + `Could not find ${chainName} ${targetName} between blocks ${startBlock}-${endBlock}`, ); } -// Take snapshots at a specific block hash -async function takeSnapshotsAtBlock( - basePath: string, - network: string, - rcPort: number, - ahPort: number, - blockHash: string, - type: "pre" | "post", -): Promise { +// Scan forwards from startBlock to endBlock to find first block matching predicate +async function scanForwards( + port: number, + startBlock: number, + endBlock: number, + predicate: (stage: any) => boolean, + chainName: string, + targetName: string, +): Promise<{ blockNumber: number; blockHash: string }> { logger.info( - `Taking ${type}-migration snapshots at block ${blockHash} for ${network}...`, + `Scanning ${chainName} forwards from block ${startBlock} to ${endBlock} for ${targetName}...`, ); - logger.info(`Using ports - RC: ${rcPort}, AH: ${ahPort}`); - try { - const rcSnapshotPath = `${basePath}/${network}-rc-${type}.snap`; - const ahSnapshotPath = `${basePath}/${network}-ah-${type}.snap`; - - // Take RC snapshot at the exact block where AccountsMigrationInit was detected - const rcCommand = `try-runtime create-snapshot --uri ws://127.0.0.1:${rcPort} --at ${blockHash} "${rcSnapshotPath}"`; - logger.info(`Executing: ${rcCommand}`); - await execAsync(rcCommand); - - // We look at the RC block to see which AH blocks were backed/included - const ahProvider = new WsProvider(`ws://127.0.0.1:${ahPort}`); - const ahApi = await ApiPromise.create({ provider: ahProvider }); - const rcProvider = new WsProvider(`ws://127.0.0.1:${rcPort}`); - const rcApi = await ApiPromise.create({ provider: rcProvider }); - - logger.info(`🔍 Finding corresponding AH block for RC block...`); - - let ahTargetHash: string; - let ahStage: any; - - if (type === "pre") { - // For pre-migration: find AH block that corresponds to the RC block - ahTargetHash = await findCorrespondingAHBlock(rcApi, ahApi, blockHash); - } else { - // For post-migration: AH monitoring is handled in main() - this should not be reached - throw new Error( - "Post-migration AH snapshot should be handled by parallel monitoring, not here", - ); + for (let blockNum = startBlock; blockNum <= endBlock; blockNum++) { + if (blockNum % 100 === 0) { + logger.debug(`${chainName} scanning block ${blockNum}...`); } - // Get the AH migration stage at the chosen block - const ahApiAt = await ahApi.at(ahTargetHash); - const ahStageRaw = await ahApiAt.query.ahMigrator.ahMigrationStage(); - ahStage = ahStageRaw.toHuman(); - const ahHeader = await ahApi.rpc.chain.getHeader(ahTargetHash); - - logger.info( - `🔍 Final AH Migration Stage at snapshot: ${JSON.stringify(ahStage)}`, - ); - logger.info( - `📍 AH snapshot at block #${ahHeader.number.toNumber()}: ${ahTargetHash.toString()}`, - ); - - await rcApi.disconnect(); - - // Use the found AH block for the snapshot - const ahCommand = `try-runtime create-snapshot --uri ws://127.0.0.1:${ahPort} --at ${ahTargetHash} "${ahSnapshotPath}"`; - logger.info(`Executing: ${ahCommand}`); - await execAsync(ahCommand); - - await ahApi.disconnect(); - - // Write snapshot info for reference - const snapshotInfo = { - [`rc_${type}_snapshot_path`]: rcSnapshotPath, - [`ah_${type}_snapshot_path`]: ahSnapshotPath, - rc_block_hash: blockHash, - ah_block_hash: ahTargetHash.toString(), - ah_migration_stage: ahStage, - network: network, - timestamp: new Date().toISOString(), - trigger: - type === "pre" - ? "RC AccountsMigrationInit detected, AH snapshot at corresponding block" - : "RC MigrationDone detected, AH snapshot at corresponding MigrationDone block", - }; - - const infoPath = join(basePath, `${type}_migration_snapshot_info.json`); - writeFileSync(infoPath, JSON.stringify(snapshotInfo, null, 2)); - - logger.info(`${type}-migration snapshots completed successfully!`); - logger.info(`RC snapshot: ${rcSnapshotPath}`); - logger.info(`AH snapshot: ${ahSnapshotPath}`); - logger.info(`Info written to: ${infoPath}`); - } catch (error) { - logger.error("Failed to take snapshots at block:", error); - throw error; + try { + const result = await queryOnce(port, async (api) => { + const hash = await api.rpc.chain.getBlockHash(blockNum); + const apiAt = await api.at(hash); + const stage = + chainName === "RC" + ? await apiAt.query.rcMigrator.rcMigrationStage() + : await apiAt.query.ahMigrator.ahMigrationStage(); + + return { + hash: hash.toString(), + stage: stage.toHuman(), + }; + }); + + if (predicate(result.stage)) { + logger.info( + `✅ Found ${chainName} ${targetName} at block ${blockNum} (${result.hash})`, + ); + return { blockNumber: blockNum, blockHash: result.hash }; + } + } catch (error) { + logger.warn(`Error scanning block ${blockNum}:`, error); + // Continue scanning + } } -} -async function main() { - const ahPort = await getAHPort(basePath); - const rcPort = await getRCPort(basePath); - - logger.info(`Using ports - RC: ${rcPort}, AH: ${ahPort}`); - - // Connect to relay chain to monitor migration stage - const wsProvider = new WsProvider(`ws://127.0.0.1:${rcPort}`); - const api = await ApiPromise.create({ provider: wsProvider }); - - logger.info( - `Starting to monitor RC migration stage for ${network} (${snapshotType} snapshots)...`, + throw new Error( + `Could not find ${chainName} ${targetName} between blocks ${startBlock}-${endBlock}`, ); - - if (snapshotType === "pre") { - // Pre-migration: Monitor RC only, find corresponding AH block when RC reaches AccountsMigrationInit - await monitorRCForPreMigration(api, basePath, network, rcPort, ahPort); - } else { - // Post-migration: Monitor both RC and AH simultaneously - await monitorBothChainsForPostMigration( - api, - basePath, - network, - rcPort, - ahPort, - ); - } } -// Monitor RC for pre-migration (AccountsMigrationInit) -async function monitorRCForPreMigration( - api: ApiPromise, - basePath: string, - network: string, +async function findSnapshotBlocks( rcPort: number, ahPort: number, -): Promise { - let snapshotTaken = false; - - // Set a timeout to prevent hanging indefinitely - const timeout = setTimeout( - () => { - if (!snapshotTaken) { - logger.error( - `⏰ Timeout waiting for AccountsMigrationInit stage (10min)`, - ); - process.exit(1); - } - }, - 10 * 60 * 1000, +): Promise { + logger.info("Finding snapshot blocks via RPC queries..."); + + // Get current block numbers (after migration is done) + const rcCurrentBlock = await queryOnce(rcPort, async (api) => { + const header = await api.rpc.chain.getHeader(); + return header.number.toNumber(); + }); + logger.info(`RC current block: ${rcCurrentBlock}`); + + const ahCurrentBlock = await queryOnce(ahPort, async (api) => { + const header = await api.rpc.chain.getHeader(); + return header.number.toNumber(); + }); + logger.info(`AH current block: ${ahCurrentBlock}`); + + // Get migration start blocks + const rcStartBlockNum = await queryOnce(rcPort, async (api) => { + const startBlock = await api.query.rcMigrator.migrationStartBlock(); + if (startBlock.isEmpty) { + throw new Error("RC migration start block not found"); + } + return (startBlock as any).unwrap().toNumber(); + }); + logger.info(`RC migration started at block: ${rcStartBlockNum}`); + + const ahStartBlockNum = await queryOnce(ahPort, async (api) => { + const startBlock = await api.query.ahMigrator.migrationStartBlock(); + if (startBlock.isEmpty) { + throw new Error("AH migration start block not found"); + } + return (startBlock as any).unwrap().toNumber(); + }); + logger.info(`AH migration started at block: ${ahStartBlockNum}`); + + // 1. Scan backwards from current to find post-migration blocks (CoolOff) + logger.info("=== Finding post-migration blocks (CoolOff) ==="); + const rcPost = await scanBackwards( + rcPort, + rcCurrentBlock, + rcStartBlockNum, + isCoolOff, + "RC", + "CoolOff (post-migration)", ); - const unsub = await api.rpc.chain.subscribeFinalizedHeads( - async (header: Header) => { - if (snapshotTaken) return; + const ahPost = await scanBackwards( + ahPort, + ahCurrentBlock, + ahStartBlockNum, + isCoolOff, + "AH", + "CoolOff (post-migration)", + ); - try { - const apiAt = await api.at(header.hash); - const raw = await apiAt.query.rcMigrator.rcMigrationStage(); - const stage = raw.toHuman(); + // 2. Scan forwards from start to find pre-migration blocks + logger.info("=== Finding pre-migration blocks ==="); + const rcPre = await scanForwards( + rcPort, + rcStartBlockNum, + rcCurrentBlock, + isAccountsMigrationInit, + "RC", + "AccountsMigrationInit (pre-migration)", + ); - logger.info( - `Block #${header.number}: Migration stage = ${JSON.stringify(stage)}`, - ); + // For AH, find DataMigrationOngoing first, then use next block + const ahDataMigrationOngoing = await scanForwards( + ahPort, + ahStartBlockNum, + ahCurrentBlock, + isDataMigrationOngoing, + "AH", + "DataMigrationOngoing", + ); - if (isAccountsMigrationInit(stage)) { - logger.info( - `🎯 AccountsMigrationInit stage detected at block ${header.number}!`, - ); - logger.info(`Taking pre-migration snapshots at exact block...`); - - snapshotTaken = true; - - try { - await takeSnapshotsAtBlock( - basePath, - network, - rcPort, - ahPort, - header.hash.toString(), - "pre", - ); - - const markerFile = join( - basePath, - "pre_migration_snapshot_done.txt", - ); - writeFileSync( - markerFile, - `Pre-migration snapshot completed at block ${header.number}\nTimestamp: ${new Date().toISOString()}`, - ); - - logger.info( - `✅ pre-migration snapshot process completed successfully!`, - ); - } catch (error) { - logger.error(`❌ Failed to take pre-migration snapshots:`, error); - process.exit(1); - } - - clearTimeout(timeout); - await unsub(); - await api.disconnect(); - process.exit(0); - } - } catch (error) { - logger.error("Error checking migration stage:", error); - } - }, + // Get the block after DataMigrationOngoing + const ahPreBlockNum = ahDataMigrationOngoing.blockNumber + 1; + const ahPreHash = await queryOnce(ahPort, async (api) => { + const hash = await api.rpc.chain.getBlockHash(ahPreBlockNum); + return hash.toString(); + }); + logger.info( + `✅ Found AH pre-migration block (DataMigrationOngoing + 1) at block ${ahPreBlockNum} (${ahPreHash})`, ); + + return { + rcPreBlock: rcPre.blockHash, + rcPreBlockNumber: rcPre.blockNumber, + ahPreBlock: ahPreHash, + ahPreBlockNumber: ahPreBlockNum, + rcPostBlock: rcPost.blockHash, + rcPostBlockNumber: rcPost.blockNumber, + ahPostBlock: ahPost.blockHash, + ahPostBlockNumber: ahPost.blockNumber, + }; } -// Monitor both RC and AH for post-migration (both MigrationDone) -async function monitorBothChainsForPostMigration( - rcApi: ApiPromise, - basePath: string, - network: string, +async function takeAllSnapshots( rcPort: number, ahPort: number, + blocks: SnapshotBlocks, ): Promise { + logger.info("🎯 Taking all 4 snapshots..."); + + // Take RC pre-migration snapshot + const rcPrePath = `${basePath}/${network}-rc-pre.snap`; logger.info( - `Starting parallel monitoring for both RC and AH MigrationDone states...`, + `Taking RC pre-migration snapshot at block ${blocks.rcPreBlockNumber} (${blocks.rcPreBlock})...`, ); + await execAsync( + `try-runtime create-snapshot --uri ws://127.0.0.1:${rcPort} --at ${blocks.rcPreBlock} "${rcPrePath}" 2>/dev/null`, + ); + logger.info(`✅ RC pre-migration snapshot completed: ${rcPrePath}`); - const ahProvider = new WsProvider(`ws://127.0.0.1:${ahPort}`); - const ahApi = await ApiPromise.create({ provider: ahProvider }); - - let rcDone = false; - let ahDone = false; - let rcDoneBlock: string | null = null; - let ahDoneBlock: string | null = null; - let snapshotTaken = false; - - // Set a timeout to prevent hanging indefinitely. - // If MIGRATION_TIMEOUT_HOURS is not set, default to 2h. - const timeoutHours = process.env.MIGRATION_TIMEOUT_HOURS - ? parseInt(process.env.MIGRATION_TIMEOUT_HOURS) - : 2; - const timeoutMs = timeoutHours * 60 * 60 * 1000; - - const timeout = setTimeout(() => { - if (!snapshotTaken) { - logger.error( - `⏰ Timeout waiting for both chains to reach MigrationDone (${timeoutHours} hours)`, - ); - process.exit(1); - } - }, timeoutMs); - - // Monitor RC for MigrationDone - const rcUnsub = await rcApi.rpc.chain.subscribeFinalizedHeads( - async (header: Header) => { - if (rcDone || snapshotTaken) return; - - try { - const apiAt = await rcApi.at(header.hash); - const raw = await apiAt.query.rcMigrator.rcMigrationStage(); - const stage = raw.toHuman(); - - logger.info( - `RC Block #${header.number}: Migration stage = ${JSON.stringify(stage)}`, - ); + // Take AH pre-migration snapshot + const ahPrePath = `${basePath}/${network}-ah-pre.snap`; + logger.info( + `Taking AH pre-migration snapshot at block ${blocks.ahPreBlockNumber} (${blocks.ahPreBlock})...`, + ); + await execAsync( + `try-runtime create-snapshot --uri ws://127.0.0.1:${ahPort} --at ${blocks.ahPreBlock} "${ahPrePath}" 2>/dev/null`, + ); + logger.info(`✅ AH pre-migration snapshot completed: ${ahPrePath}`); - if (isMigrationDone(stage)) { - logger.info( - `✅ RC MigrationDone detected at block ${header.number}!`, - ); - rcDone = true; - rcDoneBlock = header.hash.toString(); + // Take RC post-migration snapshot + const rcPostPath = `${basePath}/${network}-rc-post.snap`; + logger.info( + `Taking RC post-migration snapshot at block ${blocks.rcPostBlockNumber} (${blocks.rcPostBlock})...`, + ); + await execAsync( + `try-runtime create-snapshot --uri ws://127.0.0.1:${rcPort} --at ${blocks.rcPostBlock} "${rcPostPath}" 2>/dev/null`, + ); + logger.info(`✅ RC post-migration snapshot completed: ${rcPostPath}`); - if (ahDone && ahDoneBlock) { - await takePostMigrationSnapshots(); - } - } - } catch (error) { - logger.error("Error checking RC migration stage:", error); - } - }, + // Take AH post-migration snapshot + const ahPostPath = `${basePath}/${network}-ah-post.snap`; + logger.info( + `Taking AH post-migration snapshot at block ${blocks.ahPostBlockNumber} (${blocks.ahPostBlock})...`, + ); + await execAsync( + `try-runtime create-snapshot --uri ws://127.0.0.1:${ahPort} --at ${blocks.ahPostBlock} "${ahPostPath}" 2>/dev/null`, + ); + logger.info(`✅ AH post-migration snapshot completed: ${ahPostPath}`); + + // Write snapshot info + const snapshotInfo = { + rc_pre_snapshot_path: rcPrePath, + rc_pre_block_hash: blocks.rcPreBlock, + rc_pre_block_number: blocks.rcPreBlockNumber, + ah_pre_snapshot_path: ahPrePath, + ah_pre_block_hash: blocks.ahPreBlock, + ah_pre_block_number: blocks.ahPreBlockNumber, + rc_post_snapshot_path: rcPostPath, + rc_post_block_hash: blocks.rcPostBlock, + rc_post_block_number: blocks.rcPostBlockNumber, + ah_post_snapshot_path: ahPostPath, + ah_post_block_hash: blocks.ahPostBlock, + ah_post_block_number: blocks.ahPostBlockNumber, + network: network, + timestamp: new Date().toISOString(), + }; + + const infoPath = join(basePath, "migration_snapshot_info.json"); + writeFileSync(infoPath, JSON.stringify(snapshotInfo, null, 2)); + + const markerFile = join(basePath, "migration_snapshot_done.txt"); + writeFileSync( + markerFile, + `All migration snapshots completed\nTimestamp: ${new Date().toISOString()}`, ); - // Monitor AH for MigrationDone - const ahUnsub = await ahApi.rpc.chain.subscribeFinalizedHeads( - async (header: Header) => { - if (ahDone || snapshotTaken) return; + logger.info("🎉 All snapshots completed successfully!"); + logger.info(`Info written to: ${infoPath}`); +} - try { - const apiAt = await ahApi.at(header.hash); - const raw = await apiAt.query.ahMigrator.ahMigrationStage(); - const stage = raw.toHuman(); +async function main() { + const ahPort = await getAHPort(basePath); + const rcPort = await getRCPort(basePath); - logger.info( - `AH Block #${header.number}: Migration stage = ${JSON.stringify(stage)}`, - ); + logger.info(`Using ports - RC: ${rcPort}, AH: ${ahPort}`); - if (isMigrationDone(stage)) { - logger.info( - `✅ AH MigrationDone detected at block ${header.number}!`, - ); - ahDone = true; - ahDoneBlock = header.hash.toString(); + let msg = "Strategy: Poll for MigrationDone"; + if(onlyMonit == undefined) { + msg = `${msg}, then scan for snapshot blocks.` + } + logger.info(msg); - if (rcDone && rcDoneBlock) { - await takePostMigrationSnapshots(); - } - } - } catch (error) { - logger.error("Error checking AH migration stage:", error); - } - }, - ); + try { + // 1. Poll until migration is done (no WebSocket subscriptions!) + await waitForMigrationDone(rcPort, ahPort); - // Take snapshots when both chains are done - async function takePostMigrationSnapshots() { - if (snapshotTaken) return; - snapshotTaken = true; + // IF we are only monitoring, exit. + if(onlyMonit) { + process.exit(0); + } + + // 2. Find all snapshot blocks by scanning + const blocks = await findSnapshotBlocks(rcPort, ahPort); + logger.info("=== Snapshot blocks found ==="); + logger.info( + `RC pre: block ${blocks.rcPreBlockNumber} (${blocks.rcPreBlock})`, + ); logger.info( - `🎯 Both chains reached MigrationDone! Taking post-migration snapshots...`, + `AH pre: block ${blocks.ahPreBlockNumber} (${blocks.ahPreBlock})`, + ); + logger.info( + `RC post: block ${blocks.rcPostBlockNumber} (${blocks.rcPostBlock})`, + ); + logger.info( + `AH post: block ${blocks.ahPostBlockNumber} (${blocks.ahPostBlock})`, ); - try { - const rcSnapshotPath = `${basePath}/${network}-rc-post.snap`; - const ahSnapshotPath = `${basePath}/${network}-ah-post.snap`; - - // Take snapshots at the blocks where each chain reached MigrationDone - const rcCommand = `try-runtime create-snapshot --uri ws://127.0.0.1:${rcPort} --at ${rcDoneBlock} "${rcSnapshotPath}"`; - logger.info(`Executing: ${rcCommand}`); - await execAsync(rcCommand); - - const ahCommand = `try-runtime create-snapshot --uri ws://127.0.0.1:${ahPort} --at ${ahDoneBlock} "${ahSnapshotPath}"`; - logger.info(`Executing: ${ahCommand}`); - await execAsync(ahCommand); - - // Write snapshot info - const snapshotInfo = { - rc_post_snapshot_path: rcSnapshotPath, - ah_post_snapshot_path: ahSnapshotPath, - rc_block_hash: rcDoneBlock, - ah_block_hash: ahDoneBlock, - ah_migration_stage: "MigrationDone", - network: network, - timestamp: new Date().toISOString(), - trigger: "Both RC and AH reached MigrationDone - parallel monitoring", - }; - - const infoPath = join(basePath, "post_migration_snapshot_info.json"); - writeFileSync(infoPath, JSON.stringify(snapshotInfo, null, 2)); - - const markerFile = join(basePath, "post_migration_snapshot_done.txt"); - writeFileSync( - markerFile, - `Post-migration snapshot completed\nTimestamp: ${new Date().toISOString()}`, - ); - - logger.info(`post-migration snapshots completed successfully!`); - logger.info(`RC snapshot: ${rcSnapshotPath}`); - logger.info(`AH snapshot: ${ahSnapshotPath}`); - logger.info(`Info written to: ${infoPath}`); - } catch (error) { - logger.error(`❌ Failed to take post-migration snapshots:`, error); - process.exit(1); - } + // 3. Take all snapshots + await takeAllSnapshots(rcPort, ahPort, blocks); - clearTimeout(timeout); - await rcUnsub(); - await ahUnsub(); - await rcApi.disconnect(); - await ahApi.disconnect(); process.exit(0); + } catch (error) { + logger.error("❌ Failed to create snapshots:", error); + process.exit(1); } } diff --git a/zombie-bite-scripts/wait_n_blocks.ts b/zombie-bite-scripts/wait_n_blocks.ts index b01dbbce..96df92d3 100644 --- a/zombie-bite-scripts/wait_n_blocks.ts +++ b/zombie-bite-scripts/wait_n_blocks.ts @@ -22,6 +22,7 @@ export async function waitNBlocks(endpoint: string, blockCount: number) { return new Promise((resolve) => { const unsub = api.rpc.chain.subscribeFinalizedHeads((header) => { + console.log(`new block: ${header.number.toNumber()} from endpoint ${endpoint}`); blocksReceived++; if (blocksReceived >= blockCount) { @@ -36,7 +37,7 @@ export async function waitNBlocks(endpoint: string, blockCount: number) { unsub.then(unsubFn => unsubFn()); api.disconnect(); - resolve(); + return resolve(); } }); }); @@ -59,8 +60,16 @@ async function main() { process.exit(1); } - await waitNBlocks(endpoint, blockCount); + const timeout = new Promise((_, reject) => { + setTimeout(() => { + reject(); + }, 30 * 1000); // 30s timeout + }); + + await Promise.race([waitNBlocks(endpoint, blockCount), timeout]); + process.exit(0); } catch (error) { + console.error(JSON.stringify(error)); process.exit(1); } }