From 4a2643b6528a5786b6ecf85092c03eb9b4b08c1b Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Tue, 22 Apr 2025 21:24:33 -0400 Subject: [PATCH 1/4] remove branches kw that probably isn't needed --- .github/workflows/main.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e70e029f7..3d7bb44a1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -3,7 +3,6 @@ name: Generate tables on BigQuery on: release: types: [published] - branches: ["main"] jobs: create-tables: From 06f2b3bf0b0c0726bb1c38da7a948b6ed554e33f Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Tue, 22 Apr 2025 21:26:52 -0400 Subject: [PATCH 2/4] drop all tables on the derived schema first --- mimic-iv/concepts/make_concepts.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mimic-iv/concepts/make_concepts.sh b/mimic-iv/concepts/make_concepts.sh index 9afdf5b02..2004d6f12 100644 --- a/mimic-iv/concepts/make_concepts.sh +++ b/mimic-iv/concepts/make_concepts.sh @@ -6,6 +6,17 @@ export TARGET_DATASET=mimiciv_derived # note: max_rows=1 *displays* only one row, but all rows are inserted into the destination table BQ_OPTIONS='--quiet --headless --max_rows=0 --use_legacy_sql=False --replace' +# drop the existing tables in the target dataset +for TABLE in `bq ls physionet-data:${TARGET_DATASET} | cut -d' ' -f3`; +do + # skip the first line of dashes + if [[ "${TABLE:0:2}" == '--' ]]; then + continue + fi + echo "Dropping table ${TARGET_DATASET}.${TABLE}" + bq rm -f -q ${TARGET_DATASET}.${TABLE} +done + # generate a few tables first as the desired order isn't alphabetical for table_path in demographics/icustay_times; do From 97c7be54c704ced80aff26e7aef221f1321d0a82 Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Tue, 22 Apr 2025 21:47:42 -0400 Subject: [PATCH 3/4] add copy concepts to version specific schema --- .github/workflows/main.yml | 12 +++++- .../copy_concepts_to_versioned_schema.sh | 37 +++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 mimic-iv/concepts/copy_concepts_to_versioned_schema.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3d7bb44a1..f17aa428a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -2,7 +2,10 @@ name: Generate tables on BigQuery on: release: - types: [published] + types: [released] + +env: + MIMIC_IV_VERSION: 3_1 jobs: create-tables: @@ -24,4 +27,9 @@ jobs: run: | echo "Generating tables on BigQuery" cd mimic-iv/concepts - bash make_concepts.sh \ No newline at end of file + bash make_concepts.sh + + - name: Copy to release specific schema + run: | + echo "Copying tables to release specific schema: mimiciv_${MIMIC_IV_VERSION}_derived" + bash mimic-iv/concepts/copy_concepts_to_versioned_schema.sh ${MIMIC_IV_VERSION} \ No newline at end of file diff --git a/mimic-iv/concepts/copy_concepts_to_versioned_schema.sh b/mimic-iv/concepts/copy_concepts_to_versioned_schema.sh new file mode 100644 index 000000000..21541fb2d --- /dev/null +++ b/mimic-iv/concepts/copy_concepts_to_versioned_schema.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# This script copies the concepts in the BigQuery table mimiciv_derived to mimiciv_${VERSION}_derived. +if [ -z "$$1" ]; then + echo "Usage: $0 " + exit 1 +fi +export SOURCE_DATASET=mimiciv_derived +export TARGET_DATASET=mimiciv_$1_derived + +# check if the target dataset exists +if bq ls | grep -q ${TARGET_DATASET}; then + echo "Using existing dataset ${TARGET_DATASET}." + # drop the existing tables in the target dataset + # this includes ones which may not be in the source dataset + for TABLE in `bq ls physionet-data:${TARGET_DATASET} | cut -d' ' -f3`; + do + # skip the first line of dashes + if [[ "${TABLE:0:2}" == '--' ]]; then + continue + fi + echo "Dropping table ${TARGET_DATASET}.${TABLE}" + bq rm -f -q ${TARGET_DATASET}.${TABLE} + done +else + echo "Creating dataset ${TARGET_DATASET}" + bq mk --dataset ${TARGET_DATASET} +fi + +for TABLE in `bq ls physionet-data:${SOURCE_DATASET} | cut -d' ' -f3`; +do + # skip the first line of dashes + if [[ "${TABLE:0:2}" == '--' ]]; then + continue + fi + echo "${SOURCE_DATASET}.${TABLE} -> ${TARGET_DATASET}.${TABLE}" + bq cp -f -q ${SOURCE_DATASET}.${TABLE} ${TARGET_DATASET}.${TABLE} +done From 181841ad4aec192d40fa427f4f69481b22744308 Mon Sep 17 00:00:00 2001 From: Alistair Johnson Date: Tue, 22 Apr 2025 22:08:34 -0400 Subject: [PATCH 4/4] fix to target physionet-data --- .../copy_concepts_to_versioned_schema.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mimic-iv/concepts/copy_concepts_to_versioned_schema.sh b/mimic-iv/concepts/copy_concepts_to_versioned_schema.sh index 21541fb2d..c012f6575 100644 --- a/mimic-iv/concepts/copy_concepts_to_versioned_schema.sh +++ b/mimic-iv/concepts/copy_concepts_to_versioned_schema.sh @@ -6,32 +6,32 @@ if [ -z "$$1" ]; then fi export SOURCE_DATASET=mimiciv_derived export TARGET_DATASET=mimiciv_$1_derived +export PROJECT_ID=physionet-data # check if the target dataset exists -if bq ls | grep -q ${TARGET_DATASET}; then +if bq ls --datasets --project_id ${PROJECT_ID} | grep -q ${TARGET_DATASET}; then echo "Using existing dataset ${TARGET_DATASET}." # drop the existing tables in the target dataset # this includes ones which may not be in the source dataset - for TABLE in `bq ls physionet-data:${TARGET_DATASET} | cut -d' ' -f3`; + for TABLE in `bq ls ${PROJECT_ID}:${TARGET_DATASET} | cut -d' ' -f3`; do # skip the first line of dashes if [[ "${TABLE:0:2}" == '--' ]]; then continue fi - echo "Dropping table ${TARGET_DATASET}.${TABLE}" - bq rm -f -q ${TARGET_DATASET}.${TABLE} + bq rm -f -q ${PROJECT_ID}:${TARGET_DATASET}.${TABLE} done else - echo "Creating dataset ${TARGET_DATASET}" - bq mk --dataset ${TARGET_DATASET} + echo "Creating dataset ${PROJECT_ID}:${TARGET_DATASET}" + bq mk --dataset ${PROJECT_ID}:${TARGET_DATASET} fi -for TABLE in `bq ls physionet-data:${SOURCE_DATASET} | cut -d' ' -f3`; +echo "Copying tables from ${SOURCE_DATASET} to ${TARGET_DATASET}." +for TABLE in `bq ls ${PROJECT_ID}:${SOURCE_DATASET} | cut -d' ' -f3`; do # skip the first line of dashes if [[ "${TABLE:0:2}" == '--' ]]; then continue fi - echo "${SOURCE_DATASET}.${TABLE} -> ${TARGET_DATASET}.${TABLE}" - bq cp -f -q ${SOURCE_DATASET}.${TABLE} ${TARGET_DATASET}.${TABLE} + bq cp -f -q ${PROJECT_ID}:${SOURCE_DATASET}.${TABLE} ${PROJECT_ID}:${TARGET_DATASET}.${TABLE} done