Skip to content

chore: update pre-commit hooks #490

chore: update pre-commit hooks

chore: update pre-commit hooks #490

Workflow file for this run

name: CD
on:
workflow_dispatch:
pull_request:
push:
branches:
- main
release:
types:
- published
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
FORCE_COLOR: 3
jobs:
generate-indices:
name: Generate index artifacts
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install db-dtypes duckdb google-cloud-bigquery pandas pyarrow requests
- name: Authorize Google Cloud
uses: google-github-actions/auth@v2
with:
credentials_json: "${{ secrets.SERVICE_ACCOUNT_KEY }}"
create_credentials_file: true
export_environment_variables: true
- name: Execute SQL Query and Generate Parquet Files
run: |
python scripts/python/idc_index_data_manager.py \
--generate-parquet \
--output-dir release_artifacts
env:
GCP_PROJECT: ${{ env.GCP_PROJECT }}
- name: Generate GDC patient check parquet
run: |
python scripts/gdc/gdc_parquet_generator.py
mv gdc_idc_mapping.parquet release_artifacts/
env:
GCP_PROJECT: ${{ env.GCP_PROJECT }}
- name: Report generated file sizes
run: |
echo "## Generated Index File Sizes" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| File | Size |" >> $GITHUB_STEP_SUMMARY
echo "|------|------|" >> $GITHUB_STEP_SUMMARY
for f in release_artifacts/*.parquet; do
size=$(ls -lh "$f" | awk '{print $5}')
name=$(basename "$f")
echo "| $name | $size |" >> $GITHUB_STEP_SUMMARY
done
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Total" >> $GITHUB_STEP_SUMMARY
du -ch release_artifacts/*.parquet | tail -1 | awk '{print "Total parquet size: " $1}'
du -ch release_artifacts/*.parquet | tail -1 | awk '{print "Total parquet size: " $1}' >> $GITHUB_STEP_SUMMARY
- name: Upload index artifacts
uses: actions/upload-artifact@v4
with:
name: idc-index-artifacts
path: release_artifacts/
retention-days: 14
if-no-files-found: error
dist:
name: Distribution build
runs-on: ubuntu-latest
needs: [generate-indices]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Download pre-generated index artifacts
uses: actions/download-artifact@v4
with:
name: idc-index-artifacts
path: release_artifacts/
- name: Move artifacts to project root for packaging
run: |
mv release_artifacts/*.parquet ./src/idc_index_data || true
mv release_artifacts/*.json ./src/idc_index_data || true
mv release_artifacts/*.sql ./src/idc_index_data || true
mv release_artifacts/*.zip ./src/idc_index_data || true
ls -la ./src/idc_index_data
- name: Build distribution package
run: |
# Unset GCP_PROJECT to prevent build hook from regenerating files
# Files already exist from the generate-indices job
unset GCP_PROJECT
python -m pip install --upgrade build
python -m build
env:
# Explicitly unset GCP_PROJECT to skip data generation in build hook
GCP_PROJECT: ""
- name: Upload distribution artifacts
uses: actions/upload-artifact@v4
with:
name: Packages
path: dist/
retention-days: 14
- name: Inspect package contents
run: |
python -m pip install --upgrade check-wheel-contents
ls -lh dist/
check-wheel-contents dist/*.whl
attach-to-release:
name: Attach artifacts to release
needs: [generate-indices]
runs-on: ubuntu-latest
if: github.event_name == 'release' && github.event.action == 'published'
steps:
- name: Download index artifacts
uses: actions/download-artifact@v4
with:
name: idc-index-artifacts
path: release_artifacts/
- name: Attach artifacts to release
uses: ncipollo/release-action@v1
with:
artifacts: "release_artifacts/*.parquet,release_artifacts/*.json,release_artifacts/*.sql"
allowUpdates: true
omitBodyDuringUpdate: true
upload-to-gcs:
name: Upload artifacts to GCS
needs: [generate-indices]
runs-on: ubuntu-latest
if: github.event_name == 'release' && github.event.action == 'published'
steps:
- name: Download index artifacts
uses: actions/download-artifact@v4
with:
name: idc-index-artifacts
path: release_artifacts/
- name: Authorize Google Cloud
uses: google-github-actions/auth@v2
with:
credentials_json: "${{ secrets.GCS_SERVICE_ACCOUNT_KEY }}"
- name: Upload artifacts to GCS bucket
uses: google-github-actions/upload-cloud-storage@v2
with:
path: release_artifacts
destination:
idc-index-data-artifacts/${{ github.event.release.tag_name }}
- name: Update current/ folder with latest release
run: |
gcloud storage rm -r gs://idc-index-data-artifacts/current/ || true
gcloud storage cp -r \
gs://idc-index-data-artifacts/${{ github.event.release.tag_name }}/* \
gs://idc-index-data-artifacts/current/
- name: Ensure bucket is publicly readable
run: |
gcloud storage buckets add-iam-policy-binding gs://idc-index-data-artifacts \
--member=allUsers --role=roles/storage.objectViewer
- name: Ensure CORS policy is set
run: |
cat > /tmp/cors.json << 'CORS'
[
{
"origin": [
"https://localhost.gdc.cancer.gov:3010",
"https://portal.gdc.cancer.gov"
],
"method": ["GET", "HEAD"],
"responseHeader": ["Content-Type", "Content-Length", "Content-Range"],
"maxAgeSeconds": 3600
}
]
CORS
gcloud storage buckets update gs://idc-index-data-artifacts --cors-file=/tmp/cors.json
publish:
needs: [dist]
name: Publish to PyPI
environment: pypi
permissions:
id-token: write
runs-on: ubuntu-latest
if: github.event_name == 'release' && github.event.action == 'published'
steps:
- uses: actions/download-artifact@v4
with:
name: Packages
path: dist
- uses: pypa/gh-action-pypi-publish@release/v1