add current/ folder mirroring and switch gsutil to gcloud storage #484
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CD | |
| on: | |
| workflow_dispatch: | |
| pull_request: | |
| push: | |
| branches: | |
| - main | |
| release: | |
| types: | |
| - published | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| env: | |
| FORCE_COLOR: 3 | |
| jobs: | |
| generate-indices: | |
| name: Generate index artifacts | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Install dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install db-dtypes duckdb google-cloud-bigquery pandas pyarrow requests | |
| - name: Authorize Google Cloud | |
| uses: google-github-actions/auth@v2 | |
| with: | |
| credentials_json: "${{ secrets.SERVICE_ACCOUNT_KEY }}" | |
| create_credentials_file: true | |
| export_environment_variables: true | |
| - name: Execute SQL Query and Generate Parquet Files | |
| run: | | |
| python scripts/python/idc_index_data_manager.py \ | |
| --generate-parquet \ | |
| --output-dir release_artifacts | |
| env: | |
| GCP_PROJECT: ${{ env.GCP_PROJECT }} | |
| - name: Generate GDC patient check parquet | |
| run: | | |
| python scripts/gdc/gdc_parquet_generator.py | |
| mv gdc_idc_mapping.parquet release_artifacts/ | |
| env: | |
| GCP_PROJECT: ${{ env.GCP_PROJECT }} | |
| - name: Report generated file sizes | |
| run: | | |
| echo "## Generated Index File Sizes" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "| File | Size |" >> $GITHUB_STEP_SUMMARY | |
| echo "|------|------|" >> $GITHUB_STEP_SUMMARY | |
| for f in release_artifacts/*.parquet; do | |
| size=$(ls -lh "$f" | awk '{print $5}') | |
| name=$(basename "$f") | |
| echo "| $name | $size |" >> $GITHUB_STEP_SUMMARY | |
| done | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "### Total" >> $GITHUB_STEP_SUMMARY | |
| du -ch release_artifacts/*.parquet | tail -1 | awk '{print "Total parquet size: " $1}' | |
| du -ch release_artifacts/*.parquet | tail -1 | awk '{print "Total parquet size: " $1}' >> $GITHUB_STEP_SUMMARY | |
| - name: Upload index artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: idc-index-artifacts | |
| path: release_artifacts/ | |
| retention-days: 14 | |
| if-no-files-found: error | |
| dist: | |
| name: Distribution build | |
| runs-on: ubuntu-latest | |
| needs: [generate-indices] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Download pre-generated index artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: idc-index-artifacts | |
| path: release_artifacts/ | |
| - name: Move artifacts to project root for packaging | |
| run: | | |
| mv release_artifacts/*.parquet ./src/idc_index_data || true | |
| mv release_artifacts/*.json ./src/idc_index_data || true | |
| mv release_artifacts/*.sql ./src/idc_index_data || true | |
| mv release_artifacts/*.zip ./src/idc_index_data || true | |
| ls -la ./src/idc_index_data | |
| - name: Build distribution package | |
| run: | | |
| # Unset GCP_PROJECT to prevent build hook from regenerating files | |
| # Files already exist from the generate-indices job | |
| unset GCP_PROJECT | |
| python -m pip install --upgrade build | |
| python -m build | |
| env: | |
| # Explicitly unset GCP_PROJECT to skip data generation in build hook | |
| GCP_PROJECT: "" | |
| - name: Upload distribution artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: Packages | |
| path: dist/ | |
| retention-days: 14 | |
| - name: Inspect package contents | |
| run: | | |
| python -m pip install --upgrade check-wheel-contents | |
| ls -lh dist/ | |
| check-wheel-contents dist/*.whl | |
| attach-to-release: | |
| name: Attach artifacts to release | |
| needs: [generate-indices] | |
| runs-on: ubuntu-latest | |
| if: github.event_name == 'release' && github.event.action == 'published' | |
| steps: | |
| - name: Download index artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: idc-index-artifacts | |
| path: release_artifacts/ | |
| - name: Attach artifacts to release | |
| uses: ncipollo/release-action@v1 | |
| with: | |
| artifacts: "release_artifacts/*.parquet,release_artifacts/*.json,release_artifacts/*.sql" | |
| allowUpdates: true | |
| omitBodyDuringUpdate: true | |
| upload-to-gcs: | |
| name: Upload artifacts to GCS | |
| needs: [generate-indices] | |
| runs-on: ubuntu-latest | |
| if: github.event_name == 'release' && github.event.action == 'published' | |
| steps: | |
| - name: Download index artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: idc-index-artifacts | |
| path: release_artifacts/ | |
| - name: Authorize Google Cloud | |
| uses: google-github-actions/auth@v2 | |
| with: | |
| credentials_json: "${{ secrets.GCS_SERVICE_ACCOUNT_KEY }}" | |
| - name: Upload artifacts to GCS bucket | |
| uses: google-github-actions/upload-cloud-storage@v2 | |
| with: | |
| path: release_artifacts | |
| destination: | |
| idc-index-data-artifacts/${{ github.event.release.tag_name }} | |
| - name: Update current/ folder with latest release | |
| run: | | |
| gcloud storage rm -r gs://idc-index-data-artifacts/current/ || true | |
| gcloud storage cp -r \ | |
| gs://idc-index-data-artifacts/${{ github.event.release.tag_name }}/* \ | |
| gs://idc-index-data-artifacts/current/ | |
| - name: Ensure bucket is publicly readable | |
| run: | | |
| gcloud storage buckets add-iam-policy-binding gs://idc-index-data-artifacts \ | |
| --member=allUsers --role=roles/storage.objectViewer | |
| - name: Ensure CORS policy is set | |
| run: | | |
| cat > /tmp/cors.json << 'CORS' | |
| [ | |
| { | |
| "origin": [ | |
| "https://localhost.gdc.cancer.gov:3010", | |
| "https://portal.gdc.cancer.gov" | |
| ], | |
| "method": ["GET", "HEAD"], | |
| "responseHeader": ["Content-Type", "Content-Length", "Content-Range"], | |
| "maxAgeSeconds": 3600 | |
| } | |
| ] | |
| CORS | |
| gcloud storage buckets update gs://idc-index-data-artifacts --cors-file=/tmp/cors.json | |
| publish: | |
| needs: [dist] | |
| name: Publish to PyPI | |
| environment: pypi | |
| permissions: | |
| id-token: write | |
| runs-on: ubuntu-latest | |
| if: github.event_name == 'release' && github.event.action == 'published' | |
| steps: | |
| - uses: actions/download-artifact@v4 | |
| with: | |
| name: Packages | |
| path: dist | |
| - uses: pypa/gh-action-pypi-publish@release/v1 |